62 #include "llvm/IR/IntrinsicsAArch64.h"
97 #define DEBUG_TYPE "aarch64-lower"
100 STATISTIC(NumShiftInserts,
"Number of vector shift inserts");
101 STATISTIC(NumOptimizedImms,
"Number of times immediates were optimized");
108 cl::desc(
"Allow AArch64 Local Dynamic TLS code generation"),
113 cl::desc(
"Enable AArch64 logical imm instruction "
123 cl::desc(
"Combine extends of AArch64 masked "
124 "gather intrinsics"),
172 "Expected scalable predicate vector type!");
194 "Expected legal vector type!");
288 if (Subtarget->
hasSVE()) {
316 if (useSVEForFixedLengthVectorVT(VT))
320 if (useSVEForFixedLengthVectorVT(VT))
761 #define LCALLNAMES(A, B, N) \
762 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
763 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
764 setLibcallName(A##N##_REL, #B #N "_rel"); \
765 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
766 #define LCALLNAME4(A, B) \
767 LCALLNAMES(A, B, 1) \
768 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
769 #define LCALLNAME5(A, B) \
770 LCALLNAMES(A, B, 1) \
771 LCALLNAMES(A, B, 2) \
772 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
773 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
774 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
775 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
776 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
777 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
778 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
1150 if (Subtarget->
hasSVE()) {
1314 if (useSVEForFixedLengthVectorVT(VT))
1315 addTypeForFixedLengthSVE(VT);
1317 if (useSVEForFixedLengthVectorVT(VT))
1318 addTypeForFixedLengthSVE(VT);
1400 void AArch64TargetLowering::addTypeForNEON(
MVT VT) {
1469 for (
unsigned Opcode :
1482 void AArch64TargetLowering::addTypeForFixedLengthSVE(
MVT VT) {
1509 while (InnerVT != VT) {
1600 void AArch64TargetLowering::addDRTypeForNEON(
MVT VT) {
1605 void AArch64TargetLowering::addQRTypeForNEON(
MVT VT) {
1620 const APInt &Demanded,
1623 uint64_t OldImm = Imm, NewImm, Enc;
1624 uint64_t
Mask = ((uint64_t)(-1LL) >> (64 -
Size)), OrigMask =
Mask;
1628 if (Imm == 0 || Imm ==
Mask ||
1632 unsigned EltSize =
Size;
1648 uint64_t RotatedImm =
1649 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1651 uint64_t Sum = RotatedImm + NonDemandedBits;
1652 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1653 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1654 NewImm = (Imm | Ones) &
Mask;
1668 uint64_t
Hi = Imm >> EltSize, DemandedBitsHi =
DemandedBits >> EltSize;
1682 while (EltSize <
Size) {
1683 NewImm |= NewImm << EltSize;
1689 "demanded bits should never be altered");
1690 assert(OldImm != NewImm &&
"the new imm shouldn't be equal to the old imm");
1693 EVT VT =
Op.getValueType();
1699 if (NewImm == 0 || NewImm == OrigMask) {
1724 EVT VT =
Op.getValueType();
1730 "i32 or i64 is expected after legalization.");
1737 switch (
Op.getOpcode()) {
1741 NewOpc =
Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1744 NewOpc =
Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1747 NewOpc =
Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1753 uint64_t Imm =
C->getZExtValue();
1762 switch (
Op.getOpcode()) {
1785 case Intrinsic::aarch64_ldaxr:
1786 case Intrinsic::aarch64_ldxr: {
1788 EVT VT = cast<MemIntrinsicSDNode>(
Op)->getMemoryVT();
1798 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
1802 case Intrinsic::aarch64_neon_umaxv:
1803 case Intrinsic::aarch64_neon_uminv: {
1808 MVT VT =
Op.getOperand(1).getValueType().getSimpleVT();
1888 #define MAKE_CASE(V) \
2197 Register DestReg =
MI.getOperand(0).getReg();
2198 Register IfTrueReg =
MI.getOperand(1).getReg();
2199 Register IfFalseReg =
MI.getOperand(2).getReg();
2200 unsigned CondCode =
MI.getOperand(3).getImm();
2201 bool NZCVKilled =
MI.getOperand(4).isKill();
2232 MI.eraseFromParent();
2239 BB->getParent()->getFunction().getPersonalityFn())) &&
2240 "SEH does not use catchret!");
2246 switch (
MI.getOpcode()) {
2253 case AArch64::F128CSEL:
2256 case TargetOpcode::STACKMAP:
2257 case TargetOpcode::PATCHPOINT:
2258 case TargetOpcode::STATEPOINT:
2285 N =
N->getOperand(0).getNode();
2293 auto Opnd0 =
N->getOperand(0);
2294 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2295 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2296 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2455 bool IsLegal = (
C >> 12 == 0) || ((
C & 0xFFFULL) == 0 &&
C >> 24 == 0);
2457 <<
" legal: " << (IsLegal ?
"yes\n" :
"no\n"));
2481 assert(VT !=
MVT::f16 &&
"Lowering of strict fp16 not yet implemented");
2490 const bool FullFP16 =
2509 if (
isCMN(RHS, CC)) {
2513 }
else if (
isCMN(LHS, CC)) {
2598 unsigned Opcode = 0;
2599 const bool FullFP16 =
2624 return DAG.
getNode(Opcode,
DL,
MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2642 bool &MustBeFirst,
bool WillNegate,
2643 unsigned Depth = 0) {
2651 MustBeFirst =
false;
2658 bool IsOR = Opcode ==
ISD::OR;
2670 if (MustBeFirstL && MustBeFirstR)
2676 if (!CanNegateL && !CanNegateR)
2680 CanNegate = WillNegate && CanNegateL && CanNegateR;
2683 MustBeFirst = !CanNegate;
2688 MustBeFirst = MustBeFirstL || MustBeFirstR;
2746 bool IsOR = Opcode ==
ISD::OR;
2752 assert(ValidL &&
"Valid conjunction/disjunction tree");
2759 assert(ValidR &&
"Valid conjunction/disjunction tree");
2764 assert(!MustBeFirstR &&
"Valid conjunction/disjunction tree");
2773 bool NegateAfterAll;
2777 assert(CanNegateR &&
"at least one side must be negatable");
2778 assert(!MustBeFirstR &&
"invalid conjunction/disjunction tree");
2782 NegateAfterR =
true;
2785 NegateR = CanNegateR;
2786 NegateAfterR = !CanNegateR;
2789 NegateAfterAll = !Negate;
2791 assert(Opcode ==
ISD::AND &&
"Valid conjunction/disjunction tree");
2792 assert(!Negate &&
"Valid conjunction/disjunction tree");
2796 NegateAfterR =
false;
2797 NegateAfterAll =
false;
2817 bool DummyCanNegate;
2818 bool DummyMustBeFirst;
2830 auto isSupportedExtend = [&](
SDValue V) {
2835 if (
ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2836 uint64_t
Mask = MaskCst->getZExtValue();
2837 return (
Mask == 0xFF ||
Mask == 0xFFFF ||
Mask == 0xFFFFFFFF);
2843 if (!
Op.hasOneUse())
2846 if (isSupportedExtend(
Op))
2849 unsigned Opc =
Op.getOpcode();
2851 if (
ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(
Op.getOperand(1))) {
2852 uint64_t
Shift = ShiftCst->getZExtValue();
2853 if (isSupportedExtend(
Op.getOperand(0)))
2854 return (
Shift <= 4) ? 2 : 1;
2855 EVT VT =
Op.getValueType();
2868 uint64_t
C = RHSC->getZExtValue();
2876 if ((VT ==
MVT::i32 &&
C != 0x80000000 &&
2878 (VT ==
MVT::i64 &&
C != 0x80000000ULL &&
2897 if ((VT ==
MVT::i32 &&
C != INT32_MAX &&
2908 if ((VT ==
MVT::i32 &&
C != UINT32_MAX &&
2931 if (!isa<ConstantSDNode>(RHS) ||
2962 if ((RHSC->
getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2963 cast<LoadSDNode>(LHS)->getExtensionType() ==
ISD::ZEXTLOAD &&
2964 cast<LoadSDNode>(LHS)->getMemoryVT() ==
MVT::i16 &&
2966 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2994 static std::pair<SDValue, SDValue>
2997 "Unsupported value type");
3003 switch (
Op.getOpcode()) {
3079 Overflow =
Value.getValue(1);
3081 return std::make_pair(
Value, Overflow);
3085 if (useSVEForFixedLengthVectorVT(
Op.getValueType()))
3086 return LowerToScalableOp(
Op, DAG);
3140 if (!CFVal || !CTVal)
3168 EVT VT =
Op.getValueType();
3177 bool ExtraOp =
false;
3178 switch (
Op.getOpcode()) {
3236 unsigned IsWrite = cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue();
3237 unsigned Locality = cast<ConstantSDNode>(
Op.getOperand(3))->getZExtValue();
3238 unsigned IsData = cast<ConstantSDNode>(
Op.getOperand(4))->getZExtValue();
3240 bool IsStream = !Locality;
3244 assert(Locality <= 3 &&
"Prefetch locality out-of-range");
3248 Locality = 3 - Locality;
3252 unsigned PrfOp = (IsWrite << 4) |
3262 EVT VT =
Op.getValueType();
3266 if (useSVEForFixedLengthVectorVT(VT))
3267 return LowerFixedLengthFPExtendToSVE(
Op, DAG);
3275 if (
Op.getValueType().isScalableVector())
3278 bool IsStrict =
Op->isStrictFPOpcode();
3279 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3282 if (useSVEForFixedLengthVectorVT(SrcVT))
3283 return LowerFixedLengthFPRoundToSVE(
Op, DAG);
3287 if (useSVEForFixedLengthVectorVT(SrcVT))
3302 EVT InVT =
Op.getOperand(0).getValueType();
3303 EVT VT =
Op.getValueType();
3309 return LowerToPredicatedOp(
Op, DAG, Opcode);
3312 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3313 return LowerFixedLengthFPToIntToSVE(
Op, DAG);
3323 Op.getOpcode(), dl,
Op.getValueType(),
3329 if (VTSize < InVTSize) {
3337 if (VTSize > InVTSize) {
3352 bool IsStrict =
Op->isStrictFPOpcode();
3353 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3356 return LowerVectorFP_TO_INT(
Op, DAG);
3360 assert(!IsStrict &&
"Lowering of strict fp16 not yet implemented");
3363 Op.getOpcode(), dl,
Op.getValueType(),
3382 EVT DstVT =
Op.getValueType();
3384 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
3387 assert(SatWidth <= DstWidth &&
"Saturation width cannot exceed result width");
3394 if (SatWidth != DstWidth)
3418 EVT VT =
Op.getValueType();
3421 EVT InVT =
In.getValueType();
3422 unsigned Opc =
Op.getOpcode();
3436 return LowerToPredicatedOp(
Op, DAG, Opcode);
3439 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3440 return LowerFixedLengthIntToFPToSVE(
Op, DAG);
3444 if (VTSize < InVTSize) {
3452 if (VTSize > InVTSize) {
3464 if (
Op.getValueType().isVector())
3465 return LowerVectorINT_TO_FP(
Op, DAG);
3467 bool IsStrict =
Op->isStrictFPOpcode();
3468 SDValue SrcVal =
Op.getOperand(IsStrict ? 1 : 0);
3473 assert(!IsStrict &&
"Lowering of strict fp16 not yet implemented");
3498 EVT ArgVT =
Arg.getValueType();
3506 Entry.IsSExt =
false;
3507 Entry.IsZExt =
false;
3508 Args.push_back(Entry);
3511 : RTLIB::SINCOS_STRET_F32;
3522 std::pair<SDValue, SDValue> CallResult =
LowerCallTo(CLI);
3523 return CallResult.first;
3530 EVT OpVT =
Op.getValueType();
3531 EVT ArgVT =
Op.getOperand(0).getValueType();
3533 if (useSVEForFixedLengthVectorVT(OpVT))
3534 return LowerFixedLengthBitcastToSVE(
Op, DAG);
3539 "Expected int->fp bitcast!");
3543 return getSVESafeBitCast(OpVT, ExtResult, DAG);
3545 return getSVESafeBitCast(OpVT,
Op.getOperand(0), DAG);
3569 switch (OrigSimpleTy) {
3582 unsigned ExtOpcode) {
3598 EVT VT =
N->getValueType(0);
3603 for (
const SDValue &Elt :
N->op_values()) {
3606 unsigned HalfSize = EltSize / 2;
3608 if (!
isIntN(HalfSize,
C->getSExtValue()))
3611 if (!
isUIntN(HalfSize,
C->getZExtValue()))
3626 N->getOperand(0)->getValueType(0),
3631 EVT VT =
N->getValueType(0);
3637 for (
unsigned i = 0;
i != NumElts; ++
i) {
3639 const APInt &CInt =
C->getAPIntValue();
3660 unsigned Opcode =
N->getOpcode();
3662 SDNode *N0 =
N->getOperand(0).getNode();
3663 SDNode *N1 =
N->getOperand(1).getNode();
3671 unsigned Opcode =
N->getOpcode();
3673 SDNode *N0 =
N->getOperand(0).getNode();
3674 SDNode *N1 =
N->getOperand(1).getNode();
3749 EVT VT =
Op.getValueType();
3754 if (VT.
isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3760 "unexpected type for custom-lowering ISD::MUL");
3761 SDNode *N0 =
Op.getOperand(0).getNode();
3762 SDNode *N1 =
Op.getOperand(1).getNode();
3763 unsigned NewOpc = 0;
3767 if (isN0SExt && isN1SExt)
3772 if (isN0ZExt && isN1ZExt)
3774 else if (isN1SExt || isN1ZExt) {
3808 "unexpected types for extended operands to VMULL");
3809 return DAG.
getNode(NewOpc,
DL, VT, Op0, Op1);
3832 EVT OutVT =
Op.getValueType();
3863 unsigned IntNo = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
3867 case Intrinsic::thread_pointer: {
3871 case Intrinsic::aarch64_neon_abs: {
3872 EVT Ty =
Op.getValueType();
3884 case Intrinsic::aarch64_neon_smax:
3886 Op.getOperand(1),
Op.getOperand(2));
3887 case Intrinsic::aarch64_neon_umax:
3889 Op.getOperand(1),
Op.getOperand(2));
3890 case Intrinsic::aarch64_neon_smin:
3892 Op.getOperand(1),
Op.getOperand(2));
3893 case Intrinsic::aarch64_neon_umin:
3895 Op.getOperand(1),
Op.getOperand(2));
3897 case Intrinsic::aarch64_sve_sunpkhi:
3900 case Intrinsic::aarch64_sve_sunpklo:
3903 case Intrinsic::aarch64_sve_uunpkhi:
3906 case Intrinsic::aarch64_sve_uunpklo:
3909 case Intrinsic::aarch64_sve_clasta_n:
3911 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3912 case Intrinsic::aarch64_sve_clastb_n:
3914 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3915 case Intrinsic::aarch64_sve_lasta:
3917 Op.getOperand(1),
Op.getOperand(2));
3918 case Intrinsic::aarch64_sve_lastb:
3920 Op.getOperand(1),
Op.getOperand(2));
3921 case Intrinsic::aarch64_sve_rev:
3924 case Intrinsic::aarch64_sve_tbl:
3926 Op.getOperand(1),
Op.getOperand(2));
3927 case Intrinsic::aarch64_sve_trn1:
3929 Op.getOperand(1),
Op.getOperand(2));
3930 case Intrinsic::aarch64_sve_trn2:
3932 Op.getOperand(1),
Op.getOperand(2));
3933 case Intrinsic::aarch64_sve_uzp1:
3935 Op.getOperand(1),
Op.getOperand(2));
3936 case Intrinsic::aarch64_sve_uzp2:
3938 Op.getOperand(1),
Op.getOperand(2));
3939 case Intrinsic::aarch64_sve_zip1:
3941 Op.getOperand(1),
Op.getOperand(2));
3942 case Intrinsic::aarch64_sve_zip2:
3944 Op.getOperand(1),
Op.getOperand(2));
3945 case Intrinsic::aarch64_sve_splice:
3947 Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
3948 case Intrinsic::aarch64_sve_ptrue:
3951 case Intrinsic::aarch64_sve_clz:
3953 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3954 case Intrinsic::aarch64_sve_cnt: {
3957 if (
Data.getValueType().isFloatingPoint())
3960 Op.getOperand(2),
Data,
Op.getOperand(1));
3962 case Intrinsic::aarch64_sve_dupq_lane:
3963 return LowerDUPQLane(
Op, DAG);
3964 case Intrinsic::aarch64_sve_convert_from_svbool:
3967 case Intrinsic::aarch64_sve_convert_to_svbool:
3969 case Intrinsic::aarch64_sve_fneg:
3971 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3972 case Intrinsic::aarch64_sve_frintp:
3974 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3975 case Intrinsic::aarch64_sve_frintm:
3977 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3978 case Intrinsic::aarch64_sve_frinti:
3980 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3981 case Intrinsic::aarch64_sve_frintx:
3983 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3984 case Intrinsic::aarch64_sve_frinta:
3986 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3987 case Intrinsic::aarch64_sve_frintn:
3989 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3990 case Intrinsic::aarch64_sve_frintz:
3992 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
3993 case Intrinsic::aarch64_sve_ucvtf:
3995 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
3997 case Intrinsic::aarch64_sve_scvtf:
3999 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
4001 case Intrinsic::aarch64_sve_fcvtzu:
4003 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
4005 case Intrinsic::aarch64_sve_fcvtzs:
4007 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
4009 case Intrinsic::aarch64_sve_fsqrt:
4011 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
4012 case Intrinsic::aarch64_sve_frecpx:
4014 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
4015 case Intrinsic::aarch64_sve_fabs:
4017 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
4018 case Intrinsic::aarch64_sve_abs:
4020 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
4021 case Intrinsic::aarch64_sve_neg:
4023 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
4024 case Intrinsic::aarch64_sve_insr: {
4033 case Intrinsic::aarch64_sve_rbit:
4035 Op.getValueType(),
Op.getOperand(2),
Op.getOperand(3),
4037 case Intrinsic::aarch64_sve_revb:
4039 Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(1));
4040 case Intrinsic::aarch64_sve_sxtb:
4043 Op.getOperand(2),
Op.getOperand(3),
4046 case Intrinsic::aarch64_sve_sxth:
4049 Op.getOperand(2),
Op.getOperand(3),
4052 case Intrinsic::aarch64_sve_sxtw:
4055 Op.getOperand(2),
Op.getOperand(3),
4058 case Intrinsic::aarch64_sve_uxtb:
4061 Op.getOperand(2),
Op.getOperand(3),
4064 case Intrinsic::aarch64_sve_uxth:
4067 Op.getOperand(2),
Op.getOperand(3),
4070 case Intrinsic::aarch64_sve_uxtw:
4073 Op.getOperand(2),
Op.getOperand(3),
4077 case Intrinsic::localaddress: {
4080 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4082 Op.getSimpleValueType());
4085 case Intrinsic::eh_recoverfp: {
4090 SDValue IncomingFPOp =
Op.getOperand(2);
4092 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->
getGlobal() :
nullptr);
4095 "llvm.eh.recoverfp must take a function as the first argument");
4096 return IncomingFPOp;
4099 case Intrinsic::aarch64_neon_vsri:
4100 case Intrinsic::aarch64_neon_vsli: {
4101 EVT Ty =
Op.getValueType();
4108 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4110 return DAG.
getNode(Opcode, dl, Ty,
Op.getOperand(1),
Op.getOperand(2),
4114 case Intrinsic::aarch64_neon_srhadd:
4115 case Intrinsic::aarch64_neon_urhadd:
4116 case Intrinsic::aarch64_neon_shadd:
4117 case Intrinsic::aarch64_neon_uhadd: {
4118 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4119 IntNo == Intrinsic::aarch64_neon_shadd);
4120 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4121 IntNo == Intrinsic::aarch64_neon_urhadd);
4125 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
4128 case Intrinsic::aarch64_neon_sabd:
4129 case Intrinsic::aarch64_neon_uabd: {
4130 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ?
ISD::ABDU
4132 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
4135 case Intrinsic::aarch64_neon_uaddlp: {
4137 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1));
4139 case Intrinsic::aarch64_neon_sdot:
4140 case Intrinsic::aarch64_neon_udot:
4141 case Intrinsic::aarch64_sve_sdot:
4142 case Intrinsic::aarch64_sve_udot: {
4143 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4144 IntNo == Intrinsic::aarch64_sve_udot)
4147 return DAG.
getNode(Opcode, dl,
Op.getValueType(),
Op.getOperand(1),
4148 Op.getOperand(2),
Op.getOperand(3));
4153 bool AArch64TargetLowering::shouldExtendGSIndex(
EVT VT,
EVT &EltTy)
const {
4162 bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(
EVT VT)
const {
4171 bool AArch64TargetLowering::isVectorLoadExtDesirable(
SDValue ExtVal)
const {
4176 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
4177 {std::make_tuple(
false,
false,
false),
4179 {std::make_tuple(
false,
false,
true),
4181 {std::make_tuple(
false,
true,
false),
4183 {std::make_tuple(
false,
true,
true),
4185 {std::make_tuple(
true,
false,
false),
4187 {std::make_tuple(
true,
false,
true),
4189 {std::make_tuple(
true,
true,
false),
4191 {std::make_tuple(
true,
true,
true),
4194 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4195 return AddrModes.find(
Key)->second;
4199 std::map<std::tuple<bool, bool, bool>,
unsigned> AddrModes = {
4200 {std::make_tuple(
false,
false,
false),
4202 {std::make_tuple(
false,
false,
true),
4204 {std::make_tuple(
false,
true,
false),
4206 {std::make_tuple(
false,
true,
true),
4208 {std::make_tuple(
true,
false,
false),
4210 {std::make_tuple(
true,
false,
true),
4212 {std::make_tuple(
true,
true,
false),
4214 {std::make_tuple(
true,
true,
true),
4217 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4218 return AddrModes.find(
Key)->second;
4244 unsigned Opcode =
Index.getOpcode();
4253 if (!
Mask ||
Mask->getZExtValue() != 0xFFFFFFFF)
4271 unsigned &Opcode,
bool IsGather,
4285 if (isa<ConstantSDNode>(SplatVal))
4286 Offset = cast<ConstantSDNode>(SplatVal);
4303 uint64_t OffsetVal =
Offset->getZExtValue();
4307 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4309 BasePtr = ConstOffset;
4316 BasePtr =
Index->getOperand(0);
4317 Index = ConstOffset;
4324 assert(MGT &&
"Can only custom lower gather load nodes");
4340 bool IdxNeedsExtend =
4342 Index.getSimpleValueType().getVectorElementType() ==
MVT::i32;
4346 EVT IndexVT =
Index.getSimpleValueType();
4354 if (IsFixedLength) {
4356 "Cannot lower when not using SVE for fixed vectors");
4377 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4391 if (ResNeedsSignExtend)
4394 if (IsFixedLength) {
4395 if (
Index.getSimpleValueType().isFixedLengthVector())
4397 if (
BasePtr.getSimpleValueType().isFixedLengthVector())
4404 Chain =
Result.getValue(1);
4406 if (IsFixedLength) {
4420 Result = getSVESafeBitCast(VT, Result, DAG);
4430 assert(MSC &&
"Can only custom lower scatter store nodes");
4447 Index.getSimpleValueType().getVectorElementType() ==
MVT::i32;
4450 EVT IndexVT =
Index.getSimpleValueType();
4459 if (IsFixedLength) {
4461 "Cannot lower when not using SVE for fixed vectors");
4483 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4494 if (IsFixedLength) {
4495 if (
Index.getSimpleValueType().isFixedLengthVector())
4497 if (
BasePtr.getSimpleValueType().isFixedLengthVector())
4503 return DAG.
getNode(Opcode,
DL, VTs, Ops);
4509 assert(LoadNode &&
"Expected custom lowering of a masked load node");
4510 EVT VT =
Op->getValueType(0);
4512 if (useSVEForFixedLengthVectorVT(VT,
true))
4513 return LowerFixedLengthVectorMLoadToSVE(
Op, DAG);
4561 ST->getBasePtr(),
ST->getMemOperand());
4571 assert (StoreNode &&
"Can only custom lower store nodes");
4579 if (useSVEForFixedLengthVectorVT(VT,
true))
4580 return LowerFixedLengthVectorStoreToSVE(
Op, DAG);
4616 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4630 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4638 EVT PtrVT =
Base.getValueType();
4639 for (
unsigned i = 0;
i < 8;
i++) {
4657 assert(LoadNode &&
"Expected custom lowering of a load node");
4663 EVT PtrVT =
Base.getValueType();
4664 for (
unsigned i = 0;
i < 8;
i++) {
4670 Ops.push_back(Part);
4678 EVT VT =
Op->getValueType(0);
4708 MVT VT =
Op.getSimpleValueType();
4730 switch (
Op.getOpcode()) {
4735 return LowerBITCAST(
Op, DAG);
4737 return LowerGlobalAddress(
Op, DAG);
4739 return LowerGlobalTLSAddress(
Op, DAG);
4743 return LowerSETCC(
Op, DAG);
4745 return LowerBR_CC(
Op, DAG);
4747 return LowerSELECT(
Op, DAG);
4749 return LowerSELECT_CC(
Op, DAG);
4751 return LowerJumpTable(
Op, DAG);
4753 return LowerBR_JT(
Op, DAG);
4755 return LowerConstantPool(
Op, DAG);
4757 return LowerBlockAddress(
Op, DAG);
4759 return LowerVASTART(
Op, DAG);
4761 return LowerVACOPY(
Op, DAG);
4763 return LowerVAARG(
Op, DAG);
4808 return LowerFP_ROUND(
Op, DAG);
4810 return LowerFP_EXTEND(
Op, DAG);
4812 return LowerFRAMEADDR(
Op, DAG);
4814 return LowerSPONENTRY(
Op, DAG);
4816 return LowerRETURNADDR(
Op, DAG);
4818 return LowerADDROFRETURNADDR(
Op, DAG);
4820 return LowerCONCAT_VECTORS(
Op, DAG);
4822 return LowerINSERT_VECTOR_ELT(
Op, DAG);
4824 return LowerEXTRACT_VECTOR_ELT(
Op, DAG);
4826 return LowerBUILD_VECTOR(
Op, DAG);
4828 return LowerVECTOR_SHUFFLE(
Op, DAG);
4830 return LowerSPLAT_VECTOR(
Op, DAG);
4832 return LowerEXTRACT_SUBVECTOR(
Op, DAG);
4834 return LowerINSERT_SUBVECTOR(
Op, DAG);
4837 return LowerDIV(
Op, DAG);
4853 return LowerVectorSRA_SRL_SHL(
Op, DAG);
4857 return LowerShiftParts(
Op, DAG);
4859 return LowerCTPOP(
Op, DAG);
4861 return LowerFCOPYSIGN(
Op, DAG);
4863 return LowerVectorOR(
Op, DAG);
4865 return LowerXOR(
Op, DAG);
4872 return LowerINT_TO_FP(
Op, DAG);
4877 return LowerFP_TO_INT(
Op, DAG);
4880 return LowerFP_TO_INT_SAT(
Op, DAG);
4882 return LowerFSINCOS(
Op, DAG);
4884 return LowerFLT_ROUNDS_(
Op, DAG);
4886 return LowerSET_ROUNDING(
Op, DAG);
4888 return LowerMUL(
Op, DAG);
4896 return LowerINTRINSIC_WO_CHAIN(
Op, DAG);
4898 return LowerSTORE(
Op, DAG);
4900 return LowerFixedLengthVectorMStoreToSVE(
Op, DAG);
4902 return LowerMGATHER(
Op, DAG);
4904 return LowerMSCATTER(
Op, DAG);
4906 return LowerVECREDUCE_SEQ_FADD(
Op, DAG);
4918 return LowerVECREDUCE(
Op, DAG);
4920 return LowerATOMIC_LOAD_SUB(
Op, DAG);
4922 return LowerATOMIC_LOAD_AND(
Op, DAG);
4924 return LowerDYNAMIC_STACKALLOC(
Op, DAG);
4926 return LowerVSCALE(
Op, DAG);
4930 return LowerFixedLengthVectorIntExtendToSVE(
Op, DAG);
4933 EVT ExtraVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
4939 return LowerToPredicatedOp(
Op, DAG,
4943 return LowerTRUNCATE(
Op, DAG);
4945 return LowerMLOAD(
Op, DAG);
4947 if (useSVEForFixedLengthVectorVT(
Op.getValueType()))
4948 return LowerFixedLengthVectorLoadToSVE(
Op, DAG);
4949 return LowerLOAD(
Op, DAG);
4953 return LowerToScalableOp(
Op, DAG);
4965 return LowerFixedLengthVectorSelectToSVE(
Op, DAG);
4967 return LowerABS(
Op, DAG);
4969 return LowerBitreverse(
Op, DAG);
4976 return LowerCTTZ(
Op, DAG);
4978 return LowerVECTOR_SPLICE(
Op, DAG);
4986 bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
4987 EVT VT,
bool OverrideNEON)
const {
5037 bool IsVarArg)
const {
5076 SDValue AArch64TargetLowering::LowerFormalArguments(
5096 unsigned NumArgs =
Ins.size();
5098 unsigned CurArgIdx = 0;
5099 for (
unsigned i = 0;
i != NumArgs; ++
i) {
5101 if (
Ins[
i].isOrigArg()) {
5102 std::advance(CurOrigArg,
Ins[
i].getOrigArgIndex() - CurArgIdx);
5103 CurArgIdx =
Ins[
i].getOrigArgIndex();
5115 bool UseVarArgCC =
false;
5117 UseVarArgCC = isVarArg;
5121 assert(!Res &&
"Call operand has unhandled type");
5125 unsigned ExtraArgLocs = 0;
5126 for (
unsigned i = 0,
e =
Ins.size();
i !=
e; ++
i) {
5129 if (
Ins[
i].Flags.isByVal()) {
5133 int Size =
Ins[
i].Flags.getByValSize();
5134 unsigned NumRegs = (
Size + 7) / 8;
5141 InVals.push_back(FrameIdxN);
5146 if (
Ins[
i].Flags.isSwiftAsync())
5156 RC = &AArch64::GPR32RegClass;
5158 RC = &AArch64::GPR64RegClass;
5160 RC = &AArch64::FPR16RegClass;
5162 RC = &AArch64::FPR32RegClass;
5164 RC = &AArch64::FPR64RegClass;
5166 RC = &AArch64::FPR128RegClass;
5169 RC = &AArch64::PPRRegClass;
5171 RC = &AArch64::ZPRRegClass;
5189 "Only scalable vectors can be passed indirectly");
5213 !
Ins[
i].Flags.isInConsecutiveRegs())
5214 BEAlign = 8 - ArgSize;
5234 "Only scalable vectors can be passed indirectly");
5256 "Only scalable vectors can be passed indirectly");
5259 unsigned NumParts = 1;
5260 if (
Ins[
i].Flags.isInConsecutiveRegs()) {
5261 assert(!
Ins[
i].Flags.isInConsecutiveRegsLast());
5262 while (!
Ins[
i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5271 while (NumParts > 0) {
5273 InVals.push_back(ArgValue);
5282 BytesIncrement, Flags);
5291 InVals.push_back(ArgValue);
5294 assert((ArgLocs.size() + ExtraArgLocs) ==
Ins.size());
5305 saveVarArgRegisters(CCInfo, DAG,
DL, Chain);
5309 unsigned StackOffset = CCInfo.getNextStackOffset();
5321 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5325 if (!CCInfo.isAllocated(AArch64::X8)) {
5326 unsigned X8VReg = MF.
addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5336 for (
unsigned I = 0,
E =
Ins.size();
I !=
E; ++
I) {
5337 if (
Ins[
I].Flags.isInReg()) {
5352 unsigned StackArgSize = CCInfo.getNextStackOffset();
5354 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5358 StackArgSize =
alignTo(StackArgSize, 16);
5378 void AArch64TargetLowering::saveVarArgRegisters(
CCState &CCInfo,
5391 AArch64::X3, AArch64::X4, AArch64::X5,
5392 AArch64::X6, AArch64::X7 };
5396 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5398 if (GPRSaveSize != 0) {
5401 if (GPRSaveSize & 15)
5409 for (
unsigned i = FirstVariadicGPR;
i < NumGPRArgRegs; ++
i) {
5417 (
i - FirstVariadicGPR) * 8)
5419 MemOps.push_back(
Store);
5429 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5430 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5431 static const unsigned NumFPRArgRegs =
array_lengthof(FPRArgRegs);
5434 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5436 if (FPRSaveSize != 0) {
5441 for (
unsigned i = FirstVariadicFPR;
i < NumFPRArgRegs; ++
i) {
5442 unsigned VReg = MF.
addLiveIn(FPRArgRegs[
i], &AArch64::FPR128RegClass);
5448 MemOps.push_back(
Store);
5457 if (!MemOps.empty()) {
5464 SDValue AArch64TargetLowering::LowerCallResult(
5478 for (
unsigned i = 0;
i != RVLocs.size(); ++
i) {
5483 if (
i == 0 && isThisReturn) {
5485 "unexpected return calling convention register assignment");
5486 InVals.push_back(ThisVal);
5520 InVals.push_back(Val);
5548 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5568 bool CCMatch = CallerCC == CalleeCC;
5583 if (
i->hasByValAttr())
5592 if (
i->hasInRegAttr())
5621 "Unexpected variadic calling convention");
5624 if (isVarArg && !Outs.empty()) {
5632 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs,
C);
5636 if (!ArgLoc.isRegLoc())
5651 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5652 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5663 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs,
C);
5675 A.getValVT().isScalableVector()) &&
5676 "Expected value to be scalable");
5693 SDValue AArch64TargetLowering::addTokenForArgument(
SDValue Chain,
5696 int ClobberedFI)
const {
5699 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
5704 ArgChains.push_back(Chain);
5710 if (
LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5712 if (FI->getIndex() < 0) {
5714 int64_t InLastByte = InFirstByte;
5717 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5718 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5719 ArgChains.push_back(
SDValue(L, 1));
5726 bool AArch64TargetLowering::DoesCalleeRestoreStack(
CallingConv::ID CallCC,
5727 bool TailCallOpt)
const {
5735 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5744 bool &IsTailCall = CLI.IsTailCall;
5746 bool IsVarArg = CLI.IsVarArg;
5750 bool IsThisReturn =
false;
5754 bool IsSibCall =
false;
5764 return In.VT.isScalableVector();
5767 if (CalleeInSVE || CalleeOutSVE)
5773 IsTailCall = isEligibleForTailCallOptimization(
5774 Callee, CallConv, IsVarArg, Outs, OutVals,
Ins, DAG);
5786 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5788 "site marked musttail");
5798 unsigned NumArgs = Outs.size();
5800 for (
unsigned i = 0;
i != NumArgs; ++
i) {
5801 MVT ArgVT = Outs[
i].VT;
5804 "currently not supported");
5807 bool UseVarArgCC = !Outs[
i].IsFixed;
5814 assert(!Res &&
"Call operand has unhandled type");
5824 unsigned NumArgs = Outs.size();
5825 for (
unsigned i = 0;
i != NumArgs; ++
i) {
5826 MVT ValVT = Outs[
i].VT;
5829 CLI.getArgs()[Outs[
i].OrigArgIndex].Ty,
5841 assert(!Res &&
"Call operand has unhandled type");
5862 if (IsTailCall && !IsSibCall) {
5867 NumBytes =
alignTo(NumBytes, 16);
5872 FPDiff = NumReusableBytes - NumBytes;
5876 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (
unsigned)-FPDiff)
5884 assert(FPDiff % 16 == 0 &&
"unaligned stack on tail call");
5900 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5902 for (
const auto &
F : Forwards) {
5909 unsigned ExtraArgLocs = 0;
5910 for (
unsigned i = 0,
e = Outs.size();
i !=
e; ++
i) {
5952 "Only scalable vectors can be passed indirectly");
5955 uint64_t PartSize = StoreSize;
5956 unsigned NumParts = 1;
5961 StoreSize *= NumParts;
5979 Chain = DAG.
getStore(Chain,
DL, OutVals[
i], Ptr, MPI);
5990 BytesIncrement, Flags);
6004 "unexpected calling convention register assignment");
6006 "unexpected use of 'returned'");
6007 IsThisReturn =
true;
6016 [=](
const std::pair<unsigned, SDValue> &Elt) {
6050 OpSize = (OpSize + 7) / 8;
6054 BEAlign = 8 - OpSize;
6057 int32_t
Offset = LocMemOffset + BEAlign;
6072 Chain = addTokenForArgument(Chain, DAG, MF.
getFrameInfo(), FI);
6085 Chain,
DL, DstAddr,
Arg, SizeNode,
6090 MemOpChains.push_back(Cpy);
6100 MemOpChains.push_back(
Store);
6105 if (!MemOpChains.empty())
6111 for (
auto &RegToPass : RegsToPass) {
6113 RegToPass.second, InFlag);
6120 if (
auto *
G = dyn_cast<GlobalAddressSDNode>(
Callee)) {
6121 auto GV =
G->getGlobal();
6131 }
else if (
auto *
S = dyn_cast<ExternalSymbolSDNode>(
Callee)) {
6134 const char *Sym =
S->getSymbol();
6138 const char *Sym =
S->getSymbol();
6147 if (IsTailCall && !IsSibCall) {
6153 std::vector<SDValue> Ops;
6154 Ops.push_back(Chain);
6166 for (
auto &RegToPass : RegsToPass)
6168 RegToPass.second.getValueType()));
6175 Mask =
TRI->getThisReturnPreservedMask(MF, CallConv);
6177 IsThisReturn =
false;
6184 TRI->UpdateCustomCallPreservedMask(MF, &
Mask);
6186 if (
TRI->isAnyArgRegReserved(MF))
6187 TRI->emitReservedArgRegCallError(MF);
6189 assert(
Mask &&
"Missing call preserved mask for calling convention");
6193 Ops.push_back(InFlag);
6212 "tail calls cannot be marked with clang.arc.attachedcall");
6217 Chain = DAG.
getNode(CallOpc,
DL, NodeTys, Ops);
6222 uint64_t CalleePopBytes =
6223 DoesCalleeRestoreStack(CallConv, TailCallOpt) ?
alignTo(NumBytes, 16) : 0;
6233 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg,
Ins,
DL, DAG,
6234 InVals, IsThisReturn,
6235 IsThisReturn ? OutVals[0] :
SDValue());
6238 bool AArch64TargetLowering::CanLowerReturn(
6266 for (
unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
6267 ++
i, ++realRVLocIdx) {
6301 llvm::find_if(RetVals, [=](
const std::pair<unsigned, SDValue> &Elt) {
6312 for (
auto &RetVal : RetVals) {
6316 DAG.
getRegister(RetVal.first, RetVal.second.getValueType()));
6327 unsigned RetValReg = AArch64::X0;
6340 if (AArch64::GPR64RegClass.
contains(*
I))
6342 else if (AArch64::FPR64RegClass.
contains(*
I))
6353 RetOps.push_back(
Flag);
6364 unsigned Flag)
const {
6366 N->getOffset(),
Flag);
6371 unsigned Flag)
const {
6377 unsigned Flag)
const {
6379 N->getOffset(),
Flag);
6384 unsigned Flag)
const {
6389 template <
class NodeTy>
6391 unsigned Flags)
const {
6402 template <
class NodeTy>
6404 unsigned Flags)
const {
6418 template <
class NodeTy>
6420 unsigned Flags)
const {
6432 template <
class NodeTy>
6434 unsigned Flags)
const {
6438 SDValue Sym = getTargetNode(
N, Ty, DAG, Flags);
6450 "unexpected offset in global node");
6455 return getGOT(GN, DAG, OpFlags);
6460 Result = getAddrLarge(GN, DAG, OpFlags);
6462 Result = getAddrTiny(GN, DAG, OpFlags);
6464 Result = getAddr(GN, DAG, OpFlags);
6503 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(
SDValue Op,
6506 "This function expects a Darwin target");
6511 const GlobalValue *GV = cast<GlobalAddressSDNode>(
Op)->getGlobal();
6521 PtrMemVT,
DL, Chain, DescAddr,
6661 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(
SDValue SymAddr,
6677 AArch64TargetLowering::LowerELFGlobalTLSAddress(
SDValue Op,
6693 "in local exec TLS model");
6709 return LowerELFTLSLocalExec(GV, ThreadBase,
DL, DAG);
6732 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
6756 TPOff = LowerELFTLSDescCallSeq(SymAddr,
DL, DAG);
6764 AArch64TargetLowering::LowerWindowsGlobalTLSAddress(
SDValue Op,
6829 return LowerDarwinGlobalTLSAddress(
Op, DAG);
6831 return LowerELFGlobalTLSAddress(
Op, DAG);
6833 return LowerWindowsGlobalTLSAddress(
Op, DAG);
6844 cast<VTSDNode>(Val.
getOperand(1))->getVT().getFixedSizeInBits() -
6866 bool ProduceNonFlagSettingCondBr =
6911 if (RHSC && RHSC->
getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
6948 uint64_t SignBitPos;
6959 uint64_t SignBitPos;
6993 EVT VT =
Op.getValueType();
7002 else if (SrcVT.
bitsGT(VT))
7009 auto setVecVal = [&] (
int Idx) {
7023 EltMask = 0x80000000ULL;
7024 setVecVal(AArch64::ssub);
7033 setVecVal(AArch64::dsub);
7036 EltMask = 0x8000ULL;
7037 setVecVal(AArch64::hsub);
7067 Attribute::NoImplicitFloat))
7083 EVT VT =
Op.getValueType();
7114 "Unexpected type for custom ctpop lowering");
7121 unsigned EltSize = 8;
7136 EVT VT =
Op.getValueType();
7138 useSVEForFixedLengthVectorVT(VT,
true));
7147 EVT VT =
Op.getValueType();
7150 useSVEForFixedLengthVectorVT(VT,
true))
7197 if (
Op.getValueType().isVector())
7198 return LowerVSETCC(
Op, DAG);
7200 bool IsStrict =
Op->isStrictFPOpcode();
7202 unsigned OpNo = IsStrict ? 1 : 0;
7205 Chain =
Op.getOperand(0);
7208 ISD::CondCode CC = cast<CondCodeSDNode>(
Op.getOperand(OpNo + 2))->get();
7212 EVT VT =
Op.getValueType();
7225 "Unexpected setcc expansion!");
7355 }
else if (CTVal && CFVal) {
7377 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
7380 if (TrueVal32 > FalseVal32) {
7420 else if (CFVal && CFVal == RHSVal && AArch64CC ==
AArch64CC::NE)
7423 assert (CTVal && CFVal &&
"Expected constant operands for CSNEG.");
7437 return DAG.
getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
7456 if (RHSVal && RHSVal->
isZero()) {
7464 CFVal && CFVal->
isZero() &&
7488 EVT Ty =
Op.getValueType();
7489 auto Idx =
Op.getConstantOperandAPInt(2);
7503 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal,
DL, DAG);
7513 EVT Ty =
Op.getValueType();
7521 if (useSVEForFixedLengthVectorVT(Ty)) {
7554 CC = cast<CondCodeSDNode>(CCVal.
getOperand(2))->get();
7560 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal,
DL, DAG);
7571 return getAddrLarge(
JT, DAG);
7573 return getAddrTiny(
JT, DAG);
7575 return getAddr(
JT, DAG);
7585 int JTI = cast<JumpTableSDNode>(
JT.getNode())->getIndex();
7604 return getGOT(
CP, DAG);
7606 return getAddrLarge(
CP, DAG);
7608 return getAddrTiny(
CP, DAG);
7610 return getAddr(
CP, DAG);
7619 return getAddrLarge(BA, DAG);
7621 return getAddrTiny(BA, DAG);
7623 return getAddr(BA, DAG);
7635 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
7650 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
7668 const Value *SV = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
7675 MemOps.push_back(DAG.
getStore(Chain,
DL, Stack, VAList,
7692 MemOps.push_back(DAG.
getStore(Chain,
DL, GRTop, GRTopAddr,
7710 MemOps.push_back(DAG.
getStore(Chain,
DL, VRTop, VRTopAddr,
7739 return LowerWin64_VASTART(
Op, DAG);
7741 return LowerDarwin_VASTART(
Op, DAG);
7743 return LowerAAPCS_VASTART(
Op, DAG);
7752 unsigned VaListSize =
7756 const Value *DestSV = cast<SrcValueSDNode>(
Op.getOperand(3))->getValue();
7757 const Value *SrcSV = cast<SrcValueSDNode>(
Op.getOperand(4))->getValue();
7761 Align(PtrSize),
false,
false,
false,
7767 "automatic va_arg instruction only works on Darwin");
7769 const Value *V = cast<SrcValueSDNode>(
Op.getOperand(2))->getValue();
7770 EVT VT =
Op.getValueType();
7785 "currently not supported");
7802 ArgSize =
std::max(ArgSize, MinSlotSize);
7803 bool NeedFPTrunc =
false;
7839 EVT VT =
Op.getValueType();
7841 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
7865 #define GET_REGISTER_MATCHER
7866 #include "AArch64GenAsmMatcher.inc"
7873 if (AArch64::X1 <=
Reg &&
Reg <= AArch64::X28) {
7875 unsigned DwarfRegNum =
MRI->getDwarfRegNum(
Reg,
false);
7889 EVT VT =
Op.getValueType();
7905 EVT VT =
Op.getValueType();
7907 unsigned Depth = cast<ConstantSDNode>(
Op.getOperand(0))->getZExtValue();
7910 SDValue FrameAddr = LowerFRAMEADDR(
Op, DAG);
7918 unsigned Reg = MF.
addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
7955 bool OptForSize)
const {
7956 bool IsLegal =
false;
7982 unsigned Limit = (OptForSize ? 1 : (Subtarget->
hasFuseLiterals() ? 5 : 2));
7983 IsLegal = Insn.size() <= Limit;
7987 <<
" imm value: "; Imm.
dump(););
7999 if (
ST->hasNEON() &&
8003 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
8011 return DAG.
getNode(Opcode,
SDLoc(Operand), VT, Operand);
8021 EVT VT =
Op.getValueType();
8028 AArch64TargetLowering::getSqrtResultForDenormInput(
SDValue Op,
8037 bool Reciprocal)
const {
8039 (
Enabled == ReciprocalEstimate::Unspecified && Subtarget->
useRSqrt()))
8050 for (
int i = ExtraSteps;
i > 0; --
i) {
8068 int &ExtraSteps)
const {
8080 for (
int i = ExtraSteps;
i > 0; --
i) {
8120 const char *AArch64TargetLowering::LowerXConstraint(
EVT ConstraintVT)
const {
8150 if (Constraint ==
"Upa")
8152 if (Constraint ==
"Upl")
8160 AArch64TargetLowering::getConstraintType(
StringRef Constraint)
const {
8161 if (Constraint.
size() == 1) {
8162 switch (Constraint[0]) {
8196 AArch64TargetLowering::getSingleConstraintMatchWeight(
8197 AsmOperandInfo &
info,
const char *constraint)
const {
8199 Value *CallOperandVal =
info.CallOperandVal;
8202 if (!CallOperandVal)
8206 switch (*constraint) {
8213 if (
type->isFloatingPointTy() ||
type->isVectorTy())
8227 std::pair<unsigned, const TargetRegisterClass *>
8228 AArch64TargetLowering::getRegForInlineAsmConstraint(
8230 if (Constraint.
size() == 1) {
8231 switch (Constraint[0]) {
8234 return std::make_pair(0U,
nullptr);
8236 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
8238 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
8239 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
8245 return std::make_pair(0U, &AArch64::ZPRRegClass);
8246 return std::make_pair(0U,
nullptr);
8250 return std::make_pair(0U, &AArch64::FPR16RegClass);
8252 return std::make_pair(0U, &AArch64::FPR32RegClass);
8254 return std::make_pair(0U, &AArch64::FPR64RegClass);
8256 return std::make_pair(0U, &AArch64::FPR128RegClass);
8265 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
8267 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
8273 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
8280 return std::make_pair(0U,
nullptr);
8282 return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
8283 :
std::make_pair(0U, &AArch64::PPRRegClass);
8286 if (
StringRef(
"{cc}").equals_insensitive(Constraint))
8287 return std::make_pair(
unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
8291 std::pair<unsigned, const TargetRegisterClass *> Res;
8297 if ((
Size == 4 ||
Size == 5) && Constraint[0] ==
'{' &&
8298 tolower(Constraint[1]) ==
'v' && Constraint[
Size - 1] ==
'}') {
8301 if (!
Failed && RegNo >= 0 && RegNo <= 31) {
8306 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
8307 Res.second = &AArch64::FPR64RegClass;
8309 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
8310 Res.second = &AArch64::FPR128RegClass;
8316 if (Res.second && !Subtarget->
hasFPARMv8() &&
8317 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
8318 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
8319 return std::make_pair(0U,
nullptr);
8326 bool AllowUnknown)
const {
8335 void AArch64TargetLowering::LowerAsmOperandForConstraint(
8336 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
8341 if (Constraint.length() != 1)
8344 char ConstraintLetter = Constraint[0];
8345 switch (ConstraintLetter) {
8368 dyn_cast<BlockAddressSDNode>(
Op)) {
8387 uint64_t CVal =
C->getZExtValue();
8388 switch (ConstraintLetter) {
8396 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
8400 uint64_t NVal = -
C->getSExtValue();
8401 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
8402 CVal =
C->getSExtValue();
8433 if ((CVal & 0xFFFF) == CVal)
8435 if ((CVal & 0xFFFF0000ULL) == CVal)
8438 if ((NCVal & 0xFFFFULL) == NCVal)
8440 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8447 if ((CVal & 0xFFFFULL) == CVal)
8449 if ((CVal & 0xFFFF0000ULL) == CVal)
8451 if ((CVal & 0xFFFF00000000ULL) == CVal)
8453 if ((CVal & 0xFFFF000000000000ULL) == CVal)
8455 uint64_t NCVal = ~CVal;
8456 if ((NCVal & 0xFFFFULL) == NCVal)
8458 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8460 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
8462 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
8476 Ops.push_back(Result);
8524 LLVM_DEBUG(
dbgs() <<
"AArch64TargetLowering::ReconstructShuffle\n");
8526 EVT VT =
Op.getValueType();
8528 "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
8531 struct ShuffleSourceInfo {
8546 ShuffleSourceInfo(
SDValue Vec)
8548 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
8556 for (
unsigned i = 0;
i < NumElts; ++
i) {
8563 dbgs() <<
"Reshuffle failed: "
8564 "a shuffle can only come from building a vector from "
8565 "various elements of other vectors, provided their "
8566 "indices are constant\n");
8573 if (
Source == Sources.end())
8574 Source = Sources.
insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8577 unsigned EltNo = cast<ConstantSDNode>(V.
getOperand(1))->getZExtValue();
8582 if (Sources.size() > 2) {
8584 dbgs() <<
"Reshuffle failed: currently only do something sane when at "
8585 "most two source vectors are involved\n");
8592 for (
auto &
Source : Sources) {
8593 EVT SrcEltTy =
Source.Vec.getValueType().getVectorElementType();
8594 if (SrcEltTy.
bitsLT(SmallestEltTy)) {
8595 SmallestEltTy = SrcEltTy;
8598 unsigned ResMultiplier =
8607 for (
auto &Src : Sources) {
8608 EVT SrcVT = Src.ShuffleVec.getValueType();
8611 if (SrcVTSize == VTSize)
8620 if (SrcVTSize < VTSize) {
8621 assert(2 * SrcVTSize == VTSize);
8626 DAG.
getUNDEF(Src.ShuffleVec.getValueType()));
8630 if (SrcVTSize != 2 * VTSize) {
8632 dbgs() <<
"Reshuffle failed: result vector too small to extract\n");
8636 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8638 dbgs() <<
"Reshuffle failed: span too large for a VEXT to cope\n");
8642 if (Src.MinElt >= NumSrcElts) {
8647 Src.WindowBase = -NumSrcElts;
8648 }
else if (Src.MaxElt < NumSrcElts) {
8665 dbgs() <<
"Reshuffle failed: don't know how to lower AArch64ISD::EXT "
8666 "for SVE vectors.");
8673 Src.WindowBase = -Src.MinElt;
8680 for (
auto &Src : Sources) {
8682 if (SrcEltTy == SmallestEltTy)
8688 Src.WindowBase *= Src.WindowScale;
8694 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8701 if (Entry.isUndef())
8704 auto Src =
find(Sources, Entry.getOperand(0));
8705 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8710 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8713 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8717 int *LaneMask = &
Mask[
i * ResMultiplier];
8719 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8720 ExtractBase += NumElts * (Src - Sources.begin());
8721 for (
int j = 0;
j < LanesDefined; ++
j)
8722 LaneMask[
j] = ExtractBase +
j;
8727 LLVM_DEBUG(
dbgs() <<
"Reshuffle failed: illegal shuffle mask\n");
8732 for (
unsigned i = 0;
i < Sources.size(); ++
i)
8740 dbgs() <<
"Reshuffle, creating node: "; V.
dump(););
8759 unsigned ExpectedElt = Imm;
8760 for (
unsigned i = 1;
i < NumElts; ++
i) {
8764 if (ExpectedElt == NumElts)
8769 if (ExpectedElt !=
static_cast<unsigned>(
M[
i]))
8781 unsigned &DupLaneOp) {
8783 "Only possible block sizes for wide DUP are: 16, 32, 64");
8802 for (
size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
8803 for (
size_t I = 0;
I < NumEltsPerBlock;
I++) {
8804 int Elt =
M[BlockIndex * NumEltsPerBlock +
I];
8808 if ((
unsigned)Elt >= SingleVecNumElements)
8810 if (BlockElts[
I] < 0)
8812 else if (BlockElts[
I] != Elt)
8821 auto FirstRealEltIter =
find_if(BlockElts, [](
int Elt) {
return Elt >= 0; });
8822 assert(FirstRealEltIter != BlockElts.end() &&
8823 "Shuffle with all-undefs must have been caught by previous cases, "
8825 if (FirstRealEltIter == BlockElts.end()) {
8831 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
8833 if ((
unsigned)*FirstRealEltIter < FirstRealIndex)
8836 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
8839 if (Elt0 % NumEltsPerBlock != 0)
8843 for (
size_t I = 0;
I < NumEltsPerBlock;
I++)
8844 if (BlockElts[
I] >= 0 && (
unsigned)BlockElts[
I] != Elt0 +
I)
8847 DupLaneOp = Elt0 / NumEltsPerBlock;
8856 const int *FirstRealElt =
find_if(
M, [](
int Elt) {
return Elt >= 0; });
8861 APInt ExpectedElt =
APInt(MaskBits, *FirstRealElt + 1);
8864 const int *FirstWrongElt =
std::find_if(FirstRealElt + 1,
M.end(),
8865 [&](
int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
8866 if (FirstWrongElt !=
M.end())
8896 "Only possible block sizes for REV are: 16, 32, 64");
8903 unsigned BlockElts =
M[0] + 1;
8911 for (
unsigned i = 0;
i < NumElts; ++
i) {
8914 if ((
unsigned)
M[
i] != (
i -
i % BlockElts) + (BlockElts - 1 -
i % BlockElts))
8923 if (NumElts % 2 != 0)
8925 WhichResult = (
M[0] == 0 ? 0 : 1);
8926 unsigned Idx = WhichResult * NumElts / 2;
8927 for (
unsigned i = 0;
i != NumElts;
i += 2) {
8928 if ((
M[
i] >= 0 && (
unsigned)
M[
i] != Idx) ||
8929 (
M[
i + 1] >= 0 && (unsigned)
M[
i + 1] != Idx + NumElts))
8939 WhichResult = (
M[0] == 0 ? 0 : 1);
8940 for (
unsigned i = 0;
i != NumElts; ++
i) {
8943 if ((
unsigned)
M[
i] != 2 *
i + WhichResult)
8952 if (NumElts % 2 != 0)
8954 WhichResult = (
M[0] == 0 ? 0 : 1);
8955 for (
unsigned i = 0;
i < NumElts;
i += 2) {
8956 if ((
M[
i] >= 0 && (
unsigned)
M[
i] !=
i + WhichResult) ||
8957 (
M[
i + 1] >= 0 && (unsigned)
M[
i + 1] !=
i + NumElts + WhichResult))
8968 if (NumElts % 2 != 0)
8970 WhichResult = (
M[0] == 0 ? 0 : 1);
8971 unsigned Idx = WhichResult * NumElts / 2;
8972 for (
unsigned i = 0;
i != NumElts;
i += 2) {
8973 if ((
M[
i] >= 0 && (
unsigned)
M[
i] != Idx) ||
8974 (
M[
i + 1] >= 0 && (unsigned)
M[
i + 1] != Idx))
8987 WhichResult = (
M[0] == 0 ? 0 : 1);
8988 for (
unsigned j = 0;
j != 2; ++
j) {
8989 unsigned Idx = WhichResult;
8990 for (
unsigned i = 0;
i != Half; ++
i) {
8991 int MIdx =
M[
i +
j * Half];
8992 if (MIdx >= 0 && (
unsigned)MIdx != Idx)
9006 if (NumElts % 2 != 0)
9008 WhichResult = (
M[0] == 0 ? 0 : 1);
9009 for (
unsigned i = 0;
i < NumElts;
i += 2) {
9010 if ((
M[
i] >= 0 && (
unsigned)
M[
i] !=
i + WhichResult) ||
9011 (
M[
i + 1] >= 0 && (unsigned)
M[
i + 1] !=
i + WhichResult))
9018 bool &DstIsLeft,
int &Anomaly) {
9019 if (
M.size() !=
static_cast<size_t>(NumInputElements))
9022 int NumLHSMatch = 0, NumRHSMatch = 0;
9023 int LastLHSMismatch = -1, LastRHSMismatch = -1;
9025 for (
int i = 0;
i < NumInputElements; ++
i) {
9035 LastLHSMismatch =
i;
9037 if (
M[
i] ==
i + NumInputElements)
9040 LastRHSMismatch =
i;
9043 if (NumLHSMatch == NumInputElements - 1) {
9045 Anomaly = LastLHSMismatch;
9047 }
else if (NumRHSMatch == NumInputElements - 1) {
9049 Anomaly = LastRHSMismatch;
9062 for (
int I = 0,
E = NumElts / 2;
I !=
E;
I++) {
9067 int Offset = NumElts / 2;
9068 for (
int I = NumElts / 2,
E = NumElts;
I !=
E;
I++) {
9078 EVT VT =
Op.getValueType();
9109 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9110 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
9111 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
9132 if (LHSID == (1 * 9 + 2) * 9 + 3)
9134 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 &&
"Illegal OP_COPY!");
9179 return DAG.
getNode(Opcode, dl, VT, OpLHS, Lane);
9216 EVT EltVT =
Op.getValueType().getVectorElementType();
9220 for (
int Val : ShuffleMask) {
9221 for (
unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
9222 unsigned Offset = Byte + Val * BytesPerElt;
9228 unsigned IndexLen = 8;
9229 if (
Op.getValueSizeInBits() == 128) {
9238 if (
V2.getNode()->isUndef()) {
9247 if (IndexLen == 8) {
9287 auto getScaledOffsetDup = [](
SDValue BitCast,
int &LaneC,
MVT &CastVT) {
9298 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
9300 if (ExtIdxInBits % CastedEltBitWidth != 0)
9304 LaneC += ExtIdxInBits / CastedEltBitWidth;
9311 unsigned SrcVecNumElts =
9318 if (getScaledOffsetDup(V, Lane, CastVT)) {
9341 EVT VT =
Op.getValueType();
9345 if (useSVEForFixedLengthVectorVT(VT))
9346 return LowerFixedLengthVECTOR_SHUFFLEToSVE(
Op, DAG);
9359 "Unexpected VECTOR_SHUFFLE mask size!");
9382 for (
unsigned LaneSize : {64U, 32U, 16U}) {
9394 V1 =
constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
9415 bool ReverseEXT =
false;
9417 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
9429 unsigned WhichResult;
9430 if (
isZIPMask(ShuffleMask, VT, WhichResult)) {
9434 if (
isUZPMask(ShuffleMask, VT, WhichResult)) {
9438 if (
isTRNMask(ShuffleMask, VT, WhichResult)) {
9462 if (
isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
9467 int SrcLane = ShuffleMask[Anomaly];
9468 if (SrcLane >= NumInputElements) {
9489 unsigned PFIndexes[4];
9490 for (
unsigned i = 0;
i != 4; ++
i) {
9491 if (ShuffleMask[
i] < 0)
9494 PFIndexes[
i] = ShuffleMask[
i];
9498 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
9499 PFIndexes[2] * 9 + PFIndexes[3];
9501 unsigned Cost = (PFEntry >> 30);
9513 EVT VT =
Op.getValueType();
9517 if (useSVEForFixedLengthVectorVT(VT))
9518 return LowerToScalableOp(
Op, DAG);
9526 if (
auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
9527 if (ConstVal->isOne())
9566 EVT VT =
Op.getValueType();
9579 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
9580 if (CIdx && (CIdx->getZExtValue() <= 3)) {
9612 APInt SplatBits, SplatUndef;
9613 unsigned SplatBitSize;
9615 if (BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9618 for (
unsigned i = 0;
i < NumSplats; ++
i) {
9619 CnstBits <<= SplatBitSize;
9620 UndefBits <<= SplatBitSize;
9622 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.
getSizeInBits());
9634 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
9635 uint64_t
Value =
Bits.zextOrTrunc(64).getZExtValue();
9636 EVT VT =
Op.getValueType();
9655 const SDValue *LHS =
nullptr) {
9656 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
9657 uint64_t
Value =
Bits.zextOrTrunc(64).getZExtValue();
9658 EVT VT =
Op.getValueType();
9660 bool isAdvSIMDModImm =
false;
9680 if (isAdvSIMDModImm) {
9685 Mov = DAG.
getNode(NewOp, dl, MovTy, *LHS,
9689 Mov = DAG.
getNode(NewOp, dl, MovTy,
9703 const SDValue *LHS =
nullptr) {
9704 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
9705 uint64_t
Value =
Bits.zextOrTrunc(64).getZExtValue();
9706 EVT VT =
Op.getValueType();
9708 bool isAdvSIMDModImm =
false;
9720 if (isAdvSIMDModImm) {
9725 Mov = DAG.
getNode(NewOp, dl, MovTy, *LHS,
9729 Mov = DAG.
getNode(NewOp, dl, MovTy,
9743 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
9744 uint64_t
Value =
Bits.zextOrTrunc(64).getZExtValue();
9745 EVT VT =
Op.getValueType();
9747 bool isAdvSIMDModImm =
false;
9759 if (isAdvSIMDModImm) {
9774 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
9775 uint64_t
Value =
Bits.zextOrTrunc(64).getZExtValue();
9776 EVT VT =
Op.getValueType();
9795 if (
Bits.getHiBits(64) ==
Bits.getLoBits(64)) {
9796 uint64_t
Value =
Bits.zextOrTrunc(64).getZExtValue();
9797 EVT VT =
Op.getValueType();
9800 bool isAdvSIMDModImm =
false;
9812 if (isAdvSIMDModImm) {
9827 uint64_t &ConstVal) {
9836 for (
unsigned i = 1;
i < NumElts; ++
i)
9837 if (dyn_cast<ConstantSDNode>(Bvec->
getOperand(
i)) != FirstElt)
9844 unsigned Opcode =
N->getOpcode();
9849 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
9850 if (IID < Intrinsic::num_intrinsics)
9864 EVT VT =
N->getValueType(0);
9874 SDValue FirstOp =
N->getOperand(0);
9875 unsigned FirstOpc = FirstOp.
getOpcode();
9876 SDValue SecondOp =
N->getOperand(1);
9877 unsigned SecondOpc = SecondOp.
getOpcode();
9912 assert(C1nodeImm && C1nodeShift);
9921 if (C2 > ElemSizeInBits)
9924 APInt C1AsAPInt(ElemSizeInBits,
C1);
9927 if (C1AsAPInt != RequiredC1)
9947 if (useSVEForFixedLengthVectorVT(
Op.getValueType()))
9948 return LowerToScalableOp(
Op, DAG);
9954 EVT VT =
Op.getValueType();
9958 dyn_cast<BuildVectorSDNode>(
Op.getOperand(1).getNode());
9961 LHS =
Op.getOperand(1);
9962 BVN = dyn_cast<BuildVectorSDNode>(
Op.getOperand(0).getNode());
9979 UndefBits, &LHS)) ||
9995 EVT VT =
Op.getValueType();
10007 if (
auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
10009 CstLane->getZExtValue());
10011 }
else if (Lane.getNode()->isUndef()) {
10015 "Unexpected BUILD_VECTOR operand type");
10017 Ops.push_back(Lane);
10023 EVT VT =
Op.getValueType();
10038 DefBits = ~DefBits;
10044 DefBits = UndefBits;
10053 DefBits = ~UndefBits;
10065 EVT VT =
Op.getValueType();
10079 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
10080 if (Val.isNullValue() || Val.isAllOnesValue())
10102 bool isOnlyLowElement =
true;
10103 bool usesOnlyOneValue =
true;
10104 bool usesOnlyOneConstantValue =
true;
10106 bool AllLanesExtractElt =
true;
10107 unsigned NumConstantLanes = 0;
10108 unsigned NumDifferentLanes = 0;
10109 unsigned NumUndefLanes = 0;
10112 for (
unsigned i = 0;
i < NumElts; ++
i) {
10115 AllLanesExtractElt =
false;
10121 isOnlyLowElement =
false;
10126 ++NumConstantLanes;
10127 if (!ConstantValue.
getNode())
10129 else if (ConstantValue != V)
10130 usesOnlyOneConstantValue =
false;
10133 if (!
Value.getNode())
10135 else if (V !=
Value) {
10136 usesOnlyOneValue =
false;
10137 ++NumDifferentLanes;
10141 if (!
Value.getNode()) {
10143 dbgs() <<
"LowerBUILD_VECTOR: value undefined, creating undef node\n");
10151 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: only low element used, creating 1 "
10152 "SCALAR_TO_VECTOR node\n");
10156 if (AllLanesExtractElt) {
10162 for (
unsigned i = 0;
i < NumElts; ++
i) {
10165 if (!isa<ConstantSDNode>(
N->getOperand(1)))
10185 uint64_t Val =
N->getConstantOperandVal(1);
10186 if (Val == 2 *
i) {
10190 if (Val - 1 == 2 *
i) {
10219 if (usesOnlyOneValue) {
10222 Value.getValueType() != VT) {
10224 dbgs() <<
"LowerBUILD_VECTOR: use DUP for non-constant splats\n");
10232 if (
Value.getValueSizeInBits() == 64) {
10234 dbgs() <<
"LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
10247 EltTy ==
MVT::f64) &&
"Unsupported floating-point vector type");
10249 dbgs() <<
"LowerBUILD_VECTOR: float constant splats, creating int "
10250 "BITCASTS, and try again\n");
10252 for (
unsigned i = 0;
i < NumElts; ++
i)
10256 LLVM_DEBUG(
dbgs() <<
"LowerBUILD_VECTOR: trying to lower new vector: ";
10258 Val = LowerBUILD_VECTOR(Val, DAG);
10268 bool PreferDUPAndInsert =
10270 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
10271 NumDifferentLanes >= NumConstantLanes;
10277 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
10288 for (
unsigned i = 0;
i < NumElts; ++
i) {
10302 dbgs() <<
"LowerBUILD_VECTOR: all elements are constant, use default "
10308 if (NumElts >= 4) {
10313 if (PreferDUPAndInsert) {
10318 for (
unsigned I = 0;
I < NumElts; ++
I)
10335 dbgs() <<
"LowerBUILD_VECTOR: alternatives failed, creating sequence "
10336 "of INSERT_VECTOR_ELT\n");
10353 LLVM_DEBUG(
dbgs() <<
"Creating node for op0, it is not undefined:\n");
10358 <<
"Creating nodes for the other vector elements:\n";);
10359 for (;
i < NumElts; ++
i) {
10370 dbgs() <<
"LowerBUILD_VECTOR: use default expansion, failed to find "
10371 "better alternative\n");
10377 if (useSVEForFixedLengthVectorVT(
Op.getValueType()))
10378 return LowerFixedLengthConcatVectorsToSVE(
Op, DAG);
10380 assert(
Op.getValueType().isScalableVector() &&
10382 "Expected legal scalable vector type!");
10384 if (
isTypeLegal(
Op.getOperand(0).getValueType()) &&
Op.getNumOperands() == 2)
10394 if (useSVEForFixedLengthVectorVT(
Op.getValueType()))
10395 return LowerFixedLengthInsertVectorElt(
Op, DAG);
10398 EVT VT =
Op.getOperand(0).getValueType();
10412 ExtendedValue,
Op.getOperand(2));
10438 Op.getOperand(1),
Op.getOperand(2));
10444 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(
SDValue Op,
10447 EVT VT =
Op.getOperand(0).getValueType();
10458 Extend,
Op.getOperand(1));
10462 if (useSVEForFixedLengthVectorVT(VT))
10463 return LowerFixedLengthExtractVectorElt(
Op, DAG);
10498 assert(
Op.getValueType().isFixedLengthVector() &&
10499 "Only cases that extract a fixed length vector are supported!");
10501 EVT InVT =
Op.getOperand(0).getValueType();
10502 unsigned Idx = cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue();
10503 unsigned Size =
Op.getValueSizeInBits();
10528 assert(
Op.getValueType().isScalableVector() &&
10529 "Only expect to lower inserts into scalable vectors!");
10531 EVT InVT =
Op.getOperand(1).getValueType();
10532 unsigned Idx = cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue();
10536 EVT VT =
Op.getValueType();
10571 EVT VT =
Op.getValueType();
10573 if (useSVEForFixedLengthVectorVT(VT,
true))
10574 return LowerFixedLengthVectorIntDivideToSVE(
Op, DAG);
10582 return LowerToPredicatedOp(
Op, DAG, PredOpcode);
10601 SDValue ResultLo = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
10602 SDValue ResultHi = DAG.
getNode(
Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
10608 if (useSVEForFixedLengthVectorVT(VT))
10613 unsigned PFIndexes[4];
10614 for (
unsigned i = 0;
i != 4; ++
i) {
10618 PFIndexes[
i] =
M[
i];
10622 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10623 PFIndexes[2] * 9 + PFIndexes[3];
10625 unsigned Cost = (PFEntry >> 30);
10633 unsigned DummyUnsigned;
10637 isEXTMask(
M, VT, DummyBool, DummyUnsigned) ||
10654 Op =
Op.getOperand(0);
10656 APInt SplatBits, SplatUndef;
10657 unsigned SplatBitSize;
10659 if (!BVN || !BVN->
isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
10660 HasAnyUndefs, ElementBits) ||
10661 SplatBitSize > ElementBits)
10672 assert(VT.
isVector() &&
"vector shift count is not a vector type");
10676 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
10683 assert(VT.
isVector() &&
"vector shift count is not a vector type");
10687 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
10692 EVT VT =
Op.getValueType();
10697 EVT OpVT =
Op.getOperand(0).getValueType();
10707 if (useSVEForFixedLengthVectorVT(
Op.getOperand(0).getValueType()))
10708 return LowerFixedLengthVectorTruncateToSVE(
Op, DAG);
10715 EVT VT =
Op.getValueType();
10719 if (!
Op.getOperand(1).getValueType().isVector())
10723 switch (
Op.getOpcode()) {
10731 if (
isVShiftLImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize)
10737 Op.getOperand(0),
Op.getOperand(1));
10743 return LowerToPredicatedOp(
Op, DAG, Opc);
10747 if (
isVShiftRImm(
Op.getOperand(1), VT,
false, Cnt) && Cnt < EltSize) {
10750 return DAG.
getNode(Opc,
DL, VT,
Op.getOperand(0),
10757 unsigned Opc = (
Op.getOpcode() ==
ISD::SRA) ? Intrinsic::aarch64_neon_sshl
10758 : Intrinsic::aarch64_neon_ushl;
10766 return NegShiftLeft;
10777 "function only supposed to emit natural comparisons");
10783 bool IsZero = IsCnst && (CnstBits == 0);
10795 return DAG.
getNOT(dl, Fcmeq, VT);
10834 return DAG.
getNOT(dl, Cmeq, VT);
10869 if (
Op.getValueType().isScalableVector())
10872 if (useSVEForFixedLengthVectorVT(
Op.getOperand(0).getValueType()))
10873 return LowerFixedLengthVectorSetccToSVE(
Op, DAG);
10889 const bool FullFP16 =
10958 useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
10961 return LowerPredReductionToSVE(
Op, DAG);
10963 switch (
Op.getOpcode()) {
10993 switch (
Op.getOpcode()) {
11029 MVT VT =
Op.getSimpleValueType();
11034 Op.getOperand(0),
Op.getOperand(1), RHS,
11046 MVT VT =
Op.getSimpleValueType();
11051 Op.getOperand(0),
Op.getOperand(1), RHS,
11055 SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
11084 AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(
SDValue Op,
11087 "Only Windows alloca probing supported");
11094 cast<ConstantSDNode>(
Op.getOperand(2))->getMaybeAlignValue();
11095 EVT VT = Node->getValueType(0);
11098 "no-stack-arg-probe")) {
11106 SDValue Ops[2] = {SP, Chain};
11112 Chain = LowerWindowsDYNAMIC_STACKALLOC(
Op, Chain,
Size, DAG);
11125 SDValue Ops[2] = {SP, Chain};
11131 EVT VT =
Op.getValueType();
11135 APInt MulImm = cast<ConstantSDNode>(
Op.getOperand(0))->getAPIntValue();
11141 template <
unsigned NumVecs>
11144 AArch64TargetLowering::IntrinsicInfo &
Info,
const CallInst &CI) {
11151 for (
unsigned I = 0;
I < NumVecs; ++
I)
11160 Info.align.reset();
11171 unsigned Intrinsic)
const {
11172 auto &
DL =
I.getModule()->getDataLayout();
11173 switch (Intrinsic) {
11174 case Intrinsic::aarch64_sve_st2:
11175 return setInfoSVEStN<2>(*
this,
DL,
Info,
I);
11176 case Intrinsic::aarch64_sve_st3:
11177 return setInfoSVEStN<3>(*
this,
DL,
Info,
I);
11178 case Intrinsic::aarch64_sve_st4:
11179 return setInfoSVEStN<4>(*
this,
DL,
Info,
I);
11180 case Intrinsic::aarch64_neon_ld2:
11181 case Intrinsic::aarch64_neon_ld3:
11182 case Intrinsic::aarch64_neon_ld4:
11183 case Intrinsic::aarch64_neon_ld1x2:
11184 case Intrinsic::aarch64_neon_ld1x3:
11185 case Intrinsic::aarch64_neon_ld1x4:
11186 case Intrinsic::aarch64_neon_ld2lane:
11187 case Intrinsic::aarch64_neon_ld3lane:
11188 case Intrinsic::aarch64_neon_ld4lane:
11189 case Intrinsic::aarch64_neon_ld2r:
11190 case Intrinsic::aarch64_neon_ld3r:
11191 case Intrinsic::aarch64_neon_ld4r: {
11194 uint64_t NumElts =
DL.getTypeSizeInBits(
I.getType()) / 64;
11196 Info.ptrVal =
I.getArgOperand(
I.getNumArgOperands() - 1);
11198 Info.align.reset();
11203 case Intrinsic::aarch64_neon_st2:
11204 case Intrinsic::aarch64_neon_st3:
11205 case Intrinsic::aarch64_neon_st4:
11206 case Intrinsic::aarch64_neon_st1x2:
11207 case Intrinsic::aarch64_neon_st1x3:
11208 case Intrinsic::aarch64_neon_st1x4:
11209 case Intrinsic::aarch64_neon_st2lane:
11210 case Intrinsic::aarch64_neon_st3lane:
11211 case Intrinsic::aarch64_neon_st4lane: {
11214 unsigned NumElts = 0;
11215 for (
unsigned ArgI = 0, ArgE =
I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
11216 Type *ArgTy =
I.getArgOperand(ArgI)->getType();
11219 NumElts +=
DL.getTypeSizeInBits(ArgTy) / 64;
11222 Info.ptrVal =
I.getArgOperand(
I.getNumArgOperands() - 1);
11224 Info.align.reset();
11229 case Intrinsic::aarch64_ldaxr:
11230 case Intrinsic::aarch64_ldxr: {
11231 PointerType *PtrTy = cast<PointerType>(
I.getArgOperand(0)->getType());
11234 Info.ptrVal =
I.getArgOperand(0);
11240 case Intrinsic::aarch64_stlxr:
11241 case Intrinsic::aarch64_stxr: {
11242 PointerType *PtrTy = cast<PointerType>(
I.getArgOperand(1)->getType());
11245 Info.ptrVal =
I.getArgOperand(1);
11251 case Intrinsic::aarch64_ldaxp:
11252 case Intrinsic::aarch64_ldxp:
11255 Info.ptrVal =
I.getArgOperand(0);
11260 case Intrinsic::aarch64_stlxp:
11261 case Intrinsic::aarch64_stxp:
11264 Info.ptrVal =
I.getArgOperand(2);
11269 case Intrinsic::aarch64_sve_ldnt1: {
11270 PointerType *PtrTy = cast<PointerType>(
I.getArgOperand(1)->getType());
11273 Info.ptrVal =
I.getArgOperand(1);
11277 if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
11281 case Intrinsic::aarch64_sve_stnt1: {
11282 PointerType *PtrTy = cast<PointerType>(
I.getArgOperand(2)->getType());
11285 Info.ptrVal =
I.getArgOperand(2);
11289 if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
11318 Base.getOperand(1).hasOneUse() &&
11322 uint64_t ShiftAmount =
Base.getOperand(1).getConstantOperandVal(1);
11324 if (ShiftAmount ==
Log2_32(LoadBytes))
11337 return NumBits1 > NumBits2;
11344 return NumBits1 > NumBits2;
11351 if (
I->getOpcode() != Instruction::FMul)
11354 if (!
I->hasOneUse())
11360 !(
User->getOpcode() == Instruction::FSub ||
11361 User->getOpcode() == Instruction::FAdd))
11382 return NumBits1 == 32 && NumBits2 == 64;
11389 return NumBits1 == 32 && NumBits2 == 64;
11407 bool AArch64TargetLowering::isExtFreeImpl(
const Instruction *
Ext)
const {
11408 if (isa<FPExtInst>(
Ext))
11412 if (
Ext->getType()->isVectorTy())
11415 for (
const Use &U :
Ext->uses()) {
11420 const Instruction *Instr = cast<Instruction>(U.getUser());
11424 case Instruction::Shl:
11425 if (!isa<ConstantInt>(Instr->
getOperand(1)))
11428 case Instruction::GetElementPtr: {
11430 auto &
DL =
Ext->getModule()->getDataLayout();
11431 std::advance(GTI, U.getOperandNo()-1);
11437 uint64_t ShiftAmt =
11441 if (ShiftAmt == 0 || ShiftAmt > 4)
11445 case Instruction::Trunc:
11448 if (Instr->
getType() ==
Ext->getOperand(0)->getType())
11464 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
11465 auto *FullTy = FullV->
getType();
11466 auto *HalfTy = HalfV->getType();
11468 2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
11471 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
11472 auto *FullVT = cast<FixedVectorType>(FullV->
getType());
11473 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
11474 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
11478 Value *S1Op1, *S2Op1;
11485 if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
11486 !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
11493 int NumElements = cast<FixedVectorType>(Op1->
getType())->getNumElements() * 2;
11496 M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
11506 return Ext->getType()->getScalarSizeInBits() ==
11507 2 *
Ext->getOperand(0)->getType()->getScalarSizeInBits();
11512 !areExtDoubled(cast<Instruction>(Ext1)) ||
11513 !areExtDoubled(cast<Instruction>(Ext2)))
11521 Value *VectorOperand =
nullptr;
11526 isa<FixedVectorType>(VectorOperand->
getType()) &&
11527 cast<FixedVectorType>(VectorOperand->
getType())->getNumElements() == 2;
11540 if (!
I->getType()->isVectorTy())
11544 switch (II->getIntrinsicID()) {
11545 case Intrinsic::aarch64_neon_umull:
11548 Ops.push_back(&II->getOperandUse(0));
11549 Ops.push_back(&II->getOperandUse(1));
11552 case Intrinsic::aarch64_neon_pmull64:
11554 II->getArgOperand(1)))
11556 Ops.push_back(&II->getArgOperandUse(0));
11557 Ops.push_back(&II->getArgOperandUse(1));
11565 switch (
I->getOpcode()) {
11566 case Instruction::Sub:
11573 auto Ext1 = cast<Instruction>(
I->getOperand(0));
11574 auto Ext2 = cast<Instruction>(
I->getOperand(1));
11576 Ops.push_back(&Ext1->getOperandUse(0));
11577 Ops.push_back(&Ext2->getOperandUse(0));
11580 Ops.push_back(&
I->getOperandUse(0));
11581 Ops.push_back(&
I->getOperandUse(1));
11585 case Instruction::Mul: {
11586 bool IsProfitable =
false;
11587 for (
auto &
Op :
I->operands()) {
11589 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
11606 dyn_cast<ConstantInt>(
Insert->getOperand(2));
11608 if (!ElementConstant || ElementConstant->
getZExtValue() != 0)
11611 unsigned Opcode = OperandInstr->
getOpcode();
11612 if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
11616 Ops.push_back(&
Op);
11617 IsProfitable =
true;
11620 return IsProfitable;
11629 Align &RequiredAligment)
const {
11634 RequiredAligment =
Align(1);
11636 return NumBits == 32 || NumBits == 64;
11644 return (
DL.getTypeSizeInBits(VecTy) + 127) / 128;
11658 unsigned VecSize =
DL.getTypeSizeInBits(VecTy);
11662 if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
11666 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
11671 return VecSize == 64 || VecSize % 128 == 0;
11689 "Invalid interleave factor");
11690 assert(!Shuffles.
empty() &&
"Empty shufflevector input");
11692 "Unmatched number of shufflevectors and indices");
11706 auto *FVTy = cast<FixedVectorType>(VTy);
11710 Type *EltTy = FVTy->getElementType();
11720 if (NumLoads > 1) {
11724 FVTy->getNumElements() / NumLoads);
11729 BaseAddr =
Builder.CreateBitCast(
11735 Type *Tys[2] = {FVTy, PtrTy};
11736 static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
11737 Intrinsic::aarch64_neon_ld3,
11738 Intrinsic::aarch64_neon_ld4};
11747 for (
unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
11752 BaseAddr =
Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
11753 FVTy->getNumElements() * Factor);
11756 LdNFunc,
Builder.CreateBitCast(BaseAddr, PtrTy),
"ldN");
11759 for (
unsigned i = 0;
i < Shuffles.
size();
i++) {
11761 unsigned Index = Indices[
i];
11767 SubVec =
Builder.CreateIntToPtr(
11769 FVTy->getNumElements()));
11770 SubVecs[SVI].push_back(SubVec);
11779 auto &SubVec = SubVecs[SVI];
11782 SVI->replaceAllUsesWith(WideVec);
11816 unsigned Factor)
const {
11818 "Invalid interleave factor");
11820 auto *VecTy = cast<FixedVectorType>(SVI->
getType());
11821 assert(VecTy->getNumElements() % Factor == 0 &&
"Invalid interleaved store");
11823 unsigned LaneLen = VecTy->getNumElements() / Factor;
11824 Type *EltTy = VecTy->getElementType();
11844 Type *IntTy =
DL.getIntPtrType(EltTy);
11845 unsigned NumOpElts =
11846 cast<FixedVectorType>(Op0->
getType())->getNumElements();
11850 Op0 =
Builder.CreatePtrToInt(Op0, IntVecTy);
11851 Op1 =
Builder.CreatePtrToInt(Op1, IntVecTy);
11857 Value *BaseAddr =
SI->getPointerOperand();
11859 if (NumStores > 1) {
11862 LaneLen /= NumStores;
11868 BaseAddr =
Builder.CreateBitCast(
11870 SubVecTy->getElementType()->getPointerTo(
SI->getPointerAddressSpace()));
11875 Type *PtrTy = SubVecTy->getPointerTo(
SI->getPointerAddressSpace());
11876 Type *Tys[2] = {SubVecTy, PtrTy};
11877 static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
11878 Intrinsic::aarch64_neon_st3,
11879 Intrinsic::aarch64_neon_st4};
11883 for (
unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
11888 for (
unsigned i = 0;
i < Factor;
i++) {
11889 unsigned IdxI = StoreCount * LaneLen * Factor +
i;
11890 if (
Mask[IdxI] >= 0) {
11891 Ops.push_back(
Builder.CreateShuffleVector(
11894 unsigned StartMask = 0;
11895 for (
unsigned j = 1;
j < LaneLen;
j++) {
11896 unsigned IdxJ = StoreCount * LaneLen * Factor +
j;
11897 if (
Mask[IdxJ * Factor + IdxI] >= 0) {
11898 StartMask =
Mask[IdxJ * Factor + IdxI] - IdxJ;
11907 Ops.push_back(
Builder.CreateShuffleVector(
11914 if (StoreCount > 0)
11915 BaseAddr =
Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
11916 BaseAddr, LaneLen * Factor);
11918 Ops.push_back(
Builder.CreateBitCast(BaseAddr, PtrTy));
11919 Builder.CreateCall(StNFunc, Ops);
11944 SDValue AArch64TargetLowering::LowerSVEStructLoad(
unsigned Intrinsic,
11950 unsigned N, Opcode;
11951 static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
11956 std::tie(
N, Opcode) = IntrinsicMap[
Intrinsic];
11958 "invalid tuple vector type!");
11971 for (
unsigned I = 0;
I <
N; ++
I)
11978 bool CanImplicitFloat =
11980 bool CanUseNEON = Subtarget->
hasNEON() && CanImplicitFloat;
11981 bool CanUseFP = Subtarget->
hasFPARMv8() && CanImplicitFloat;
11985 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
11986 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
11987 if (
Op.isAligned(AlignCheck))
11995 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
11998 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(
MVT::f128,
Align(16)))
12009 bool CanImplicitFloat =
12011 bool CanUseNEON = Subtarget->
hasNEON() && CanImplicitFloat;
12012 bool CanUseFP = Subtarget->
hasFPARMv8() && CanImplicitFloat;
12016 bool IsSmallMemset =
Op.isMemset() &&
Op.size() < 32;
12017 auto AlignmentIsAcceptable = [&](
EVT VT,
Align AlignCheck) {
12018 if (
Op.isAligned(AlignCheck))
12026 if (CanUseNEON &&
Op.isMemset() && !IsSmallMemset &&
12029 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(
MVT::f128,
Align(16)))
12042 <<
": avoid UB for INT64_MIN\n");
12047 bool IsLegal = ((Immed >> 12) == 0 ||
12048 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
12050 <<
" legal add imm: " << (IsLegal ?
"yes" :
"no") <<
"\n");
12081 if (isa<ScalableVectorType>(Ty)) {
12082 uint64_t VecElemNumBytes =
12083 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
12085 (AM.
Scale == 0 || (uint64_t)AM.
Scale == VecElemNumBytes);
12090 uint64_t NumBytes = 0;
12092 uint64_t NumBits =
DL.getTypeSizeInBits(Ty);
12093 NumBytes = NumBits / 8;
12107 if (NumBytes &&
Offset > 0 && (
Offset / NumBytes) <= (1LL << 12) - 1 &&
12116 return AM.
Scale == 1 || (AM.
Scale > 0 && (uint64_t)AM.
Scale == NumBytes);
12181 static const MCPhysReg ScratchRegs[] = {
12182 AArch64::X16, AArch64::X17, AArch64::LR, 0
12184 return ScratchRegs;
12190 N =
N->getOperand(0).getNode();
12191 EVT VT =
N->getValueType(0);
12195 isa<ConstantSDNode>(
N->getOperand(1))) {
12196 uint64_t TruncMask =
N->getConstantOperandVal(1);
12198 N->getOperand(0).getOpcode() ==
ISD::SRL &&
12199 isa<ConstantSDNode>(
N->getOperand(0)->getOperand(1)))
12217 if ((int64_t)Val < 0)
12220 Val &= (1LL << 32) - 1;
12223 unsigned Shift = (63 - LZ) / 16;
12229 unsigned Index)
const {
12242 EVT VT =
N->getValueType(0);
12255 auto *ShiftAmt = dyn_cast<ConstantSDNode>(
Shift.getOperand(1));
12256 EVT ShiftEltTy =
Shift.getValueType().getVectorElementType();
12257 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.
getSizeInBits() - 1)
12284 SDValue VecReduceOp0 =
N->getOperand(0);
12285 unsigned Opcode = VecReduceOp0.
getOpcode();
12292 if (
ABS->getOperand(0)->getOpcode() !=
ISD::SUB ||
12297 unsigned Opcode0 =
SUB->getOperand(0).getOpcode();
12298 unsigned Opcode1 =
SUB->getOperand(1).getOpcode();
12305 bool IsZExt =
false;
12331 UABDHigh8Op0, UABDHigh8Op1);
12342 UABDLo8Op0, UABDLo8Op1);
12358 if (!
ST->hasDotProd())
12372 if (
A.getOpcode() !=
B.getOpcode() ||
12373 A.getOperand(0).getValueType() !=
B.getOperand(0).getValueType())
12375 ExtOpcode =
A.getOpcode();
12380 EVT Op0VT =
A.getOperand(0).getValueType();
12390 B =
B.getOperand(0);
12397 A.getOperand(0),
B);
12411 AArch64TargetLowering::BuildSDIVPow2(
SDNode *
N,
const APInt &Divisor,
12419 EVT VT =
N->getValueType(0);
12421 !(Divisor.
isPowerOf2() || (-Divisor).isPowerOf2()))
12436 Created.push_back(Cmp.
getNode());
12437 Created.push_back(
Add.getNode());
12438 Created.push_back(CSel.
getNode());
12449 Created.push_back(
SRA.getNode());
12457 case Intrinsic::aarch64_sve_cntb:
12458 case Intrinsic::aarch64_sve_cnth:
12459 case Intrinsic::aarch64_sve_cntw:
12460 case Intrinsic::aarch64_sve_cntd:
12489 return TypeNode->
getVT();
12499 if (
Mask == UCHAR_MAX)
12501 else if (
Mask == USHRT_MAX)
12503 else if (
Mask == UINT_MAX)
12521 dyn_cast<ShuffleVectorSDNode>(VectorShuffle.
getNode());
12541 unsigned ExtendOpcode = Extend.
getOpcode();
12582 DAG.
getUNDEF(PreExtendVT), ShuffleMask);
12585 DL, TargetType, VectorShuffleNode);
12621 if (!isa<ConstantSDNode>(
N->getOperand(1)))
12626 const APInt &ConstValue =
C->getAPIntValue();
12633 if (ConstValue.
sge(1) && ConstValue.
sle(16))
12650 if (TrailingZeroes) {
12658 if (
N->hasOneUse() && (
N->use_begin()->getOpcode() ==
ISD::ADD ||
12659 N->use_begin()->getOpcode() ==
ISD::SUB))
12664 APInt ShiftedConstValue = ConstValue.
ashr(TrailingZeroes);
12666 unsigned ShiftAmt, AddSubOpc;
12668 bool ShiftValUseIsN0 =
true;
12670 bool NegateResult =
false;
12676 APInt SCVMinus1 = ShiftedConstValue - 1;
12677 APInt CVPlus1 = ConstValue + 1;
12689 APInt CVNegPlus1 = -ConstValue + 1;
12690 APInt CVNegMinus1 = -ConstValue - 1;
12694 ShiftValUseIsN0 =
false;
12696 ShiftAmt = CVNegMinus1.
logBase2();
12698 NegateResult =
true;
12704 EVT VT =
N->getValueType(0);
12708 SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
12709 SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
12711 assert(!(NegateResult && TrailingZeroes) &&
12712 "NegateResult and TrailingZeroes cannot both be true for now.");
12717 if (TrailingZeroes)
12736 EVT VT =
N->getValueType(0);
12738 N->getOperand(0)->getOperand(0)->getOpcode() !=
ISD::SETCC ||
12739 VT.
getSizeInBits() !=
N->getOperand(0)->getValueType(0).getSizeInBits())
12747 dyn_cast<BuildVectorSDNode>(
N->getOperand(0)->getOperand(1))) {
12749 if (!BV->isConstant())
12754 EVT IntVT = BV->getValueType(0);
12761 N->getOperand(0)->getOperand(0), MaskConst);
12776 EVT VT =
N->getValueType(0);
12781 if (VT.
getSizeInBits() !=
N->getOperand(0).getValueSizeInBits())
12790 !cast<LoadSDNode>(N0)->isVolatile()) {
12816 if (!
N->getValueType(0).isSimple())
12820 if (!
Op.getValueType().isVector() || !
Op.getValueType().isSimple() ||
12825 if (!isa<BuildVectorSDNode>(ConstVec))
12828 MVT FloatTy =
Op.getSimpleValueType().getVectorElementType();
12830 if (FloatBits != 32 && FloatBits != 64)
12833 MVT IntTy =
N->getSimpleValueType(0).getVectorElementType();
12835 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12839 if (IntBits > FloatBits)
12844 int32_t
Bits = IntBits == 64 ? 64 : 32;
12846 if (
C == -1 ||
C == 0 ||
C >
Bits)
12850 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
12851 switch (NumLanes) {
12866 "Illegal vector type after legalization");
12870 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
12871 : Intrinsic::aarch64_neon_vcvtfp2fxu;
12877 if (IntBits < FloatBits)
12892 unsigned Opc =
Op->getOpcode();
12893 if (!
Op.getValueType().isVector() || !
Op.getValueType().isSimple() ||
12894 !
Op.getOperand(0).getValueType().isSimple() ||
12898 SDValue ConstVec =
N->getOperand(1);
12899 if (!isa<BuildVectorSDNode>(ConstVec))
12902 MVT IntTy =
Op.getOperand(0).getSimpleValueType().getVectorElementType();
12904 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12907 MVT FloatTy =
N->getSimpleValueType(0).getVectorElementType();
12909 if (FloatBits != 32 && FloatBits != 64)
12913 if (IntBits > FloatBits)
12919 if (
C == -1 ||
C == 0 ||
C > FloatBits)
12923 unsigned NumLanes =
Op.getValueType().getVectorNumElements();
12924 switch (NumLanes) {
12941 if (IntBits < FloatBits)
12945 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
12946 : Intrinsic::aarch64_neon_vcvtfxu2fp;
12963 if (!isa<ConstantSDNode>(
N.getOperand(1)))
12966 ShiftAmount =
N->getConstantOperandVal(1);
12967 Src =
N->getOperand(0);
12980 EVT VT =
N->getValueType(0);
12989 bool LHSFromHi =
false;
12990 if (!
findEXTRHalf(
N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
12995 bool RHSFromHi =
false;
12996 if (!
findEXTRHalf(
N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
13001 if (LHSFromHi == RHSFromHi)
13018 EVT VT =
N->getValueType(0);
13041 for (
int i = 1;
i >= 0; --
i) {
13042 for (
int j = 1;
j >= 0; --
j) {
13079 uint64_t BitMask =
Bits == 64 ? -1ULL : ((1ULL <<
Bits) - 1);
13080 for (
int i = 1;
i >= 0; --
i)
13081 for (
int j = 1;
j >= 0; --
j) {
13084 if (!BVN0 || !BVN1)
13087 bool FoundMatch =
true;
13091 if (!CN0 || !CN1 ||
13093 FoundMatch =
false;
13110 EVT VT =
N->getValueType(0);
13128 uint64_t MaskForTy = 0ull;
13131 MaskForTy = 0xffull;
13134 MaskForTy = 0xffffull;
13137 MaskForTy = 0xffffffffull;
13145 if (
auto *Op0 = dyn_cast<ConstantSDNode>(
N->getOperand(0)))
13146 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
13170 uint64_t ExtVal =
C->getZExtValue();
13175 if ((ExtVal == 0xFF && EltTy ==
MVT::i8) ||
13176 (ExtVal == 0xFFFF && EltTy ==
MVT::i16) ||
13177 (ExtVal == 0xFFFFFFFF && EltTy ==
MVT::i32))
13192 return DAG.
getNode(Opc,
DL,
N->getValueType(0), And);
13211 MemVT = cast<VTSDNode>(Src->
getOperand(3))->getVT();
13228 MemVT = cast<VTSDNode>(Src->
getOperand(4))->getVT();
13244 EVT VT =
N->getValueType(0);
13257 dyn_cast<BuildVectorSDNode>(
N->getOperand(1).getNode());
13270 DefBits = ~DefBits;
13277 UndefBits = ~UndefBits;
13279 UndefBits, &LHS)) ||
13291 EVT VT =
N->getValueType(0);
13304 uint64_t ShiftAmt =
C->getZExtValue();
13305 if (VT ==
MVT::i32 && ShiftAmt == 16 &&
13308 if (VT ==
MVT::i64 && ShiftAmt == 32 &&
13330 EVT VT =
N->getValueType(0);
13342 uint64_t ShiftAmount =
Shift.getConstantOperandVal(1);
13343 if (ShiftAmount != 1)
13346 SDValue ExtendOpA, ExtendOpB;
13348 unsigned ShiftOp0Opc = ShiftOp0.
getOpcode();
13361 APInt CAsAPInt(ElemSizeInBits,
C);
13367 }
else if (ShiftOp0Opc ==
ISD::ADD) {
13373 unsigned ExtendOpAOpc = ExtendOpA.
getOpcode();
13374 unsigned ExtendOpBOpc = ExtendOpB.
getOpcode();
13375 if (!(ExtendOpAOpc == ExtendOpBOpc &&
13390 bool IsRHADD = ShiftOp0Opc ==
ISD::SUB;
13391 unsigned HADDOpc = IsSignExtend
13411 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
13414 EVT VT =
N->getValueType(0);
13415 const bool FullFP16 =
13436 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
13440 if (Shuffle && Shuffle->
getMaskElt(0) == 1 &&
13457 EVT VT =
N->getValueType(0);
13458 SDValue N0 =
N->getOperand(0), N1 =
N->getOperand(1);
13483 for (
size_t i = 0;
i <
Mask.size(); ++
i)
13509 if (
N->getNumOperands() == 2 && N0Opc == N1Opc &&
13529 if (N00Source == N10Source && N01Source == N11Source &&
13538 if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
13540 return DAG.
getNode(N0Opc, dl, VT, N00Source, N01Source);
13572 dbgs() <<
"aarch64-lower: concat_vectors bitcast simplification\n");
13606 EVT ResTy =
N->getValueType(0);
13614 "unexpected vector size on extract_vector_elt!");
13645 switch (
N.getOpcode()) {
13665 MVT NarrowTy =
N.getSimpleValueType();
13675 DAG.
getNode(
N->getOpcode(), dl, NewVT,
N->ops()),
13681 N =
N.getOperand(0);
13684 if (
N.getOperand(0).getValueType().isScalableVector())
13686 return cast<ConstantSDNode>(
N.getOperand(1))->getAPIntValue() ==
13687 N.getOperand(0).getValueType().getVectorNumElements() / 2;
13743 cast<ConstantSDNode>(
Op.getOperand(2))->getZExtValue());
13752 if (!TValue || !FValue)
13756 if (!TValue->
isOne()) {
13820 EVT VT =
Op->getValueType(0);
13827 EVT VT =
N->getValueType(0);
13838 auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->
getOperand(1));
13839 auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->
getOperand(1));
13840 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
13864 EVT VT =
N->getValueType(0);
13871 auto isZeroDot = [](
SDValue Dot) {
13876 if (!isZeroDot(Dot))
13878 if (!isZeroDot(Dot))
13902 MVT VT =
N->getSimpleValueType(0);
13967 "unexpected shape for long operation");
13983 return DAG.
getNode(
N->getOpcode(),
SDLoc(
N),
N->getValueType(0), LHS, RHS);
13986 N->getOperand(0), LHS, RHS);
13990 MVT ElemTy =
N->getSimpleValueType(0).getScalarType();
13993 int64_t ShiftAmount;
13995 APInt SplatValue, SplatUndef;
13996 unsigned SplatBitSize;
13999 HasAnyUndefs, ElemBits) ||
14000 SplatBitSize != ElemBits)
14004 }
else if (
ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(
N->getOperand(2))) {
14005 ShiftAmount = CVN->getSExtValue();
14014 case Intrinsic::aarch64_neon_sqshl:
14016 IsRightShift =
false;
14018 case Intrinsic::aarch64_neon_uqshl:
14020 IsRightShift =
false;
14022 case Intrinsic::aarch64_neon_srshl:
14024 IsRightShift =
true;
14026 case Intrinsic::aarch64_neon_urshl:
14028 IsRightShift =
true;
14030 case Intrinsic::aarch64_neon_sqshlu:
14032 IsRightShift =
false;
14034 case Intrinsic::aarch64_neon_sshl:
14035 case Intrinsic::aarch64_neon_ushl:
14040 IsRightShift =
false;
14044 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(
int)ElemBits) {
14046 return DAG.
getNode(Opcode, dl,
N->getValueType(0),
N->getOperand(1),
14048 }
else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
14050 return DAG.
getNode(Opcode, dl,
N->getValueType(0),
N->getOperand(1),
14070 N->getOperand(0),
N->getOperand(1), AndN.
getOperand(0));
14078 N->getOperand(1).getSimpleValueType(),
14107 SDValue Passthru =
N->getOperand(1);
14110 Pred,
Scalar, Passthru);
14116 EVT VT =
N->getValueType(0);
14145 SDValue Comparator =
N->getOperand(3);
14149 EVT VT =
N->getValueType(0);
14150 EVT CmpVT =
N->getOperand(2).getValueType();
14161 case Intrinsic::aarch64_sve_cmpeq_wide:
14162 case Intrinsic::aarch64_sve_cmpne_wide:
14163 case Intrinsic::aarch64_sve_cmpge_wide:
14164 case Intrinsic::aarch64_sve_cmpgt_wide:
14165 case Intrinsic::aarch64_sve_cmplt_wide:
14166 case Intrinsic::aarch64_sve_cmple_wide: {
14167 if (
auto *CN = dyn_cast<ConstantSDNode>(Comparator.
getOperand(0))) {
14168 int64_t ImmVal = CN->getSExtValue();
14169 if (ImmVal >= -16 && ImmVal <= 15)
14177 case Intrinsic::aarch64_sve_cmphs_wide:
14178 case Intrinsic::aarch64_sve_cmphi_wide:
14179 case Intrinsic::aarch64_sve_cmplo_wide:
14180 case Intrinsic::aarch64_sve_cmpls_wide: {
14181 if (
auto *CN = dyn_cast<ConstantSDNode>(Comparator.
getOperand(0))) {
14182 uint64_t ImmVal = CN->getZExtValue();
14208 assert(
Op.getValueType().isScalableVector() &&
14210 "Expected legal scalable vector type!");
14232 SDValue VecToReduce =
N->getOperand(2);
14251 SDValue VecToReduce =
N->getOperand(2);
14268 SDValue InitVal =
N->getOperand(2);
14269 SDValue VecToReduce =
N->getOperand(3);
14276 DAG.
getUNDEF(ReduceVT), InitVal, Zero);
14278 SDValue Reduce = DAG.
getNode(Opc,
DL, ReduceVT, Pred, InitVal, VecToReduce);
14287 unsigned NumElts =
N.getValueType().getVectorMinNumElements();
14291 N =
N.getOperand(0);
14294 if (
N.getValueType().getVectorMinNumElements() < NumElts)
14303 return N.getValueType().getVectorMinNumElements() >= NumElts;
14313 bool UnpredOp =
false) {
14315 assert(
N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!");
14321 return DAG.
getNode(Opc,
SDLoc(
N),
N->getValueType(0),
N->getOperand(2),
14325 N->getOperand(2),
N->getOperand(3));
14340 case Intrinsic::aarch64_neon_vcvtfxs2fp:
14341 case Intrinsic::aarch64_neon_vcvtfxu2fp:
14343 case Intrinsic::aarch64_neon_saddv:
14345 case Intrinsic::aarch64_neon_uaddv:
14347 case Intrinsic::aarch64_neon_sminv:
14349 case Intrinsic::aarch64_neon_uminv:
14351 case Intrinsic::aarch64_neon_smaxv:
14353 case Intrinsic::aarch64_neon_umaxv:
14355 case Intrinsic::aarch64_neon_fmax:
14357 N->getOperand(1),
N->getOperand(2));
14358 case Intrinsic::aarch64_neon_fmin:
14360 N->getOperand(1),
N->getOperand(2));
14361 case Intrinsic::aarch64_neon_fmaxnm:
14363 N->getOperand(1),
N->getOperand(2));
14364 case Intrinsic::aarch64_neon_fminnm:
14366 N->getOperand(1),
N->getOperand(2));
14367 case Intrinsic::aarch64_neon_smull:
14368 case Intrinsic::aarch64_neon_umull:
14369 case Intrinsic::aarch64_neon_pmull:
14370 case Intrinsic::aarch64_neon_sqdmull:
14372 case Intrinsic::aarch64_neon_sqshl:
14373 case Intrinsic::aarch64_neon_uqshl:
14374 case Intrinsic::aarch64_neon_sqshlu:
14375 case Intrinsic::aarch64_neon_srshl:
14376 case Intrinsic::aarch64_neon_urshl:
14377 case Intrinsic::aarch64_neon_sshl:
14378 case Intrinsic::aarch64_neon_ushl:
14380 case Intrinsic::aarch64_crc32b:
14381 case Intrinsic::aarch64_crc32cb:
14383 case Intrinsic::aarch64_crc32h:
14384 case Intrinsic::aarch64_crc32ch:
14386 case Intrinsic::aarch64_sve_saddv:
14388 if (
N->getOperand(2)->getValueType(0).getVectorElementType() ==
MVT::i64)
14392 case Intrinsic::aarch64_sve_uaddv:
14394 case Intrinsic::aarch64_sve_smaxv:
14396 case Intrinsic::aarch64_sve_umaxv:
14398 case Intrinsic::aarch64_sve_sminv:
14400 case Intrinsic::aarch64_sve_uminv:
14402 case Intrinsic::aarch64_sve_orv:
14404 case Intrinsic::aarch64_sve_eorv:
14406 case Intrinsic::aarch64_sve_andv:
14408 case Intrinsic::aarch64_sve_index:
14410 case Intrinsic::aarch64_sve_dup:
14412 case Intrinsic::aarch64_sve_dup_x:
14415 case Intrinsic::aarch64_sve_ext:
14417 case Intrinsic::aarch64_sve_mul:
14419 case Intrinsic::aarch64_sve_smulh:
14421 case Intrinsic::aarch64_sve_umulh:
14423 case Intrinsic::aarch64_sve_smin:
14425 case Intrinsic::aarch64_sve_umin:
14427 case Intrinsic::aarch64_sve_smax:
14429 case Intrinsic::aarch64_sve_umax:
14431 case Intrinsic::aarch64_sve_lsl:
14433 case Intrinsic::aarch64_sve_lsr:
14435 case Intrinsic::aarch64_sve_asr:
14437 case Intrinsic::aarch64_sve_fadd:
14439 case Intrinsic::aarch64_sve_fsub:
14441 case Intrinsic::aarch64_sve_fmul:
14443 case Intrinsic::aarch64_sve_add:
14445 case Intrinsic::aarch64_sve_sub:
14447 case Intrinsic::aarch64_sve_and:
14449 case Intrinsic::aarch64_sve_bic:
14451 case Intrinsic::aarch64_sve_eor:
14453 case Intrinsic::aarch64_sve_orr:
14455 case Intrinsic::aarch64_sve_sqadd:
14457 case Intrinsic::aarch64_sve_sqsub:
14459 case Intrinsic::aarch64_sve_uqadd:
14461 case Intrinsic::aarch64_sve_uqsub:
14463 case Intrinsic::aarch64_sve_sqadd_x:
14465 N->getOperand(1),
N->getOperand(2));
14466 case Intrinsic::aarch64_sve_sqsub_x:
14468 N->getOperand(1),
N->getOperand(2));
14469 case Intrinsic::aarch64_sve_uqadd_x:
14471 N->getOperand(1),
N->getOperand(2));
14472 case Intrinsic::aarch64_sve_uqsub_x:
14474 N->getOperand(1),
N->getOperand(2));
14475 case Intrinsic::aarch64_sve_cmphs:
14476 if (!
N->getOperand(2).getValueType().isFloatingPoint())
14478 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14481 case Intrinsic::aarch64_sve_cmphi:
14482 if (!
N->getOperand(2).getValueType().isFloatingPoint())
14484 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14487 case Intrinsic::aarch64_sve_fcmpge:
14488 case Intrinsic::aarch64_sve_cmpge:
14490 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14493 case Intrinsic::aarch64_sve_fcmpgt:
14494 case Intrinsic::aarch64_sve_cmpgt:
14496 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14499 case Intrinsic::aarch64_sve_fcmpeq:
14500 case Intrinsic::aarch64_sve_cmpeq:
14502 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14505 case Intrinsic::aarch64_sve_fcmpne:
14506 case Intrinsic::aarch64_sve_cmpne:
14508 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14511 case Intrinsic::aarch64_sve_fcmpuo:
14513 N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14516 case Intrinsic::aarch64_sve_fadda:
14518 case Intrinsic::aarch64_sve_faddv:
14520 case Intrinsic::aarch64_sve_fmaxnmv:
14522 case Intrinsic::aarch64_sve_fmaxv:
14524 case Intrinsic::aarch64_sve_fminnmv:
14526 case Intrinsic::aarch64_sve_fminv:
14528 case Intrinsic::aarch64_sve_sel:
14530 N->getOperand(1),
N->getOperand(2),
N->getOperand(3));
14531 case Intrinsic::aarch64_sve_cmpeq_wide:
14533 case Intrinsic::aarch64_sve_cmpne_wide:
14535 case Intrinsic::aarch64_sve_cmpge_wide:
14537 case Intrinsic::aarch64_sve_cmpgt_wide:
14539 case Intrinsic::aarch64_sve_cmplt_wide:
14541 case Intrinsic::aarch64_sve_cmple_wide:
14543 case Intrinsic::aarch64_sve_cmphs_wide:
14545 case Intrinsic::aarch64_sve_cmphi_wide:
14547 case Intrinsic::aarch64_sve_cmplo_wide:
14549 case Intrinsic::aarch64_sve_cmpls_wide:
14551 case Intrinsic::aarch64_sve_ptest_any:
14552 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14554 case Intrinsic::aarch64_sve_ptest_first:
14555 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14557 case Intrinsic::aarch64_sve_ptest_last:
14558 return getPTest(DAG,
N->getValueType(0),
N->getOperand(1),
N->getOperand(2),
14572 (
N->getOperand(0).getOpcode() ==
ISD::ABDU ||
14573 N->getOperand(0).getOpcode() ==
ISD::ABDS)) {
14574 SDNode *ABDNode =
N->getOperand(0).getNode();
14586 SDValue SplatVal,
unsigned NumVecElts) {
14597 uint64_t BaseOffset = 0;
14605 if (BasePtr->getOpcode() ==
ISD::ADD &&
14606 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
14607 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
14608 BasePtr = BasePtr->getOperand(0);
14611 unsigned Offset = EltOffset;
14612 while (--NumVecElts) {
14628 assert(ContentTy.
isSimple() &&
"No SVE containers for extended types");
14657 EVT VT =
N->getValueType(0);
14662 EVT ContainerVT = VT;
14667 SDValue Ops[] = {
N->getOperand(0),
14675 if (ContainerVT.
isInteger() && (VT != ContainerVT))
14683 EVT VT =
N->getValueType(0);
14684 EVT PtrTy =
N->getOperand(3).getValueType();
14694 auto *MINode = cast<MemIntrinsicSDNode>(
N);
14697 MINode->getOperand(3), DAG.
getUNDEF(PtrTy),
14699 MINode->getMemoryVT(), MINode->getMemOperand(),
14710 template <
unsigned Opcode>
14714 "Unsupported opcode.");
14716 EVT VT =
N->getValueType(0);
14725 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(2),
N->getOperand(3)};
14738 EVT DataVT =
Data.getValueType();
14750 if (
Data.getValueType().isFloatingPoint())
14755 SDValue Ops[] = {
N->getOperand(0),
14769 EVT DataVT =
Data.getValueType();
14770 EVT PtrTy =
N->getOperand(4).getValueType();
14779 auto *MINode = cast<MemIntrinsicSDNode>(
N);
14782 MINode->getMemoryVT(), MINode->getMemOperand(),
14812 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
14814 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
14836 if (Offset < -512 || Offset > 504)
14840 for (
int I = 0;
I < NumVecElts; ++
I) {
14852 ZeroReg = AArch64::WZR;
14855 ZeroReg = AArch64::XZR;
14879 if (NumVecElts != 4 && NumVecElts != 2)
14890 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
14892 for (
unsigned I = 0;
I < NumVecElts; ++
I) {
14908 if (IndexVal >= NumVecElts)
14910 IndexNotInserted.reset(IndexVal);
14915 if (IndexNotInserted.any())
14926 if (
S->isVolatile() ||
S->isIndexed())
14939 return ReplacedZeroSplat;
14963 S->getAlignment() <= 2)
14970 return ReplacedSplat;
14981 SDValue BasePtr =
S->getBasePtr();
14983 DAG.
getStore(
S->getChain(),
DL, SubVector0, BasePtr,
S->getPointerInfo(),
14984 S->getAlignment(),
S->getMemOperand()->getFlags());
14988 S->getPointerInfo(),
S->getAlignment(),
14989 S->getMemOperand()->getFlags());
14996 if (
N->getOperand(2).isUndef())
14997 return N->getOperand(1);
15006 EVT ResVT =
N->getValueType(0);
15028 unsigned Opc =
N->getOpcode();
15034 "Invalid opcode.");
15052 EVT ResVT =
N->getValueType(0);
15054 const auto OffsetOpc =
Offset.getOpcode();
15055 const bool OffsetIsZExt =
15057 const bool OffsetIsSExt =
15061 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
15063 VTSDNode *ExtFrom = cast<VTSDNode>(
Offset.getOperand(2).getNode());
15069 if (ExtPg == Pg && ExtFromEVT ==
MVT::i32) {
15077 {Chain, Pg,
Base, UnextendedOffset, Ty});
15093 unsigned OpScalarSize =
Op.getScalarValueSizeInBits();
15095 unsigned ShiftImm =
N->getConstantOperandVal(1);
15096 assert(OpScalarSize > ShiftImm &&
"Invalid shift imm");
15099 APInt DemandedMask = ~ShiftedOutBits;
15116 EVT VT =
N->getValueType(0);
15121 unsigned LoadIdx = IsLaneOp ? 1 : 0;
15122 SDNode *
LD =
N->getOperand(LoadIdx).getNode();
15130 Lane =
N->getOperand(2);
15131 auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
15146 if (UI.getUse().getResNo() == 1)
15156 Addr.getNode()->use_end(); UI != UE; ++UI) {
15159 || UI.getUse().getResNo() !=
Addr.getResNo())
15165 uint32_t IncVal = CInc->getZExtValue();
15167 if (IncVal != NumBytes)
15177 Worklist.push_back(
User);
15178 Worklist.push_back(
LD);
15179 Worklist.push_back(
Vector.getNode());
15185 Ops.push_back(
LD->getOperand(0));
15188 Ops.push_back(Lane);
15190 Ops.push_back(
Addr);
15191 Ops.push_back(Inc);
15233 "Expected STORE dag node in input!");
15235 if (
auto Store = dyn_cast<StoreSDNode>(
N)) {
15236 if (!
Store->isTruncatingStore() ||
Store->isIndexed())
15239 auto ExtOpCode =
Ext.getOpcode();
15247 Store->getBasePtr(),
Store->getPointerInfo(),
15248 Store->getAlign());
15279 unsigned AddrOpIdx =
N->getNumOperands() - 1;
15284 UE =
Addr.getNode()->use_end(); UI != UE; ++UI) {
15287 UI.getUse().getResNo() !=
Addr.getResNo())
15295 Worklist.push_back(
N);
15296 Worklist.push_back(
User);
15302 bool IsStore =
false;
15303 bool IsLaneOp =
false;
15304 bool IsDupOp =
false;
15305 unsigned NewOpc = 0;
15306 unsigned NumVecs = 0;
15307 unsigned IntNo = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
15311 NumVecs = 2;
break;
15313 NumVecs = 3;
break;
15315 NumVecs = 4;
break;
15317 NumVecs = 2; IsStore =
true;
break;
15319 NumVecs = 3; IsStore =
true;
break;
15321 NumVecs = 4; IsStore =
true;
break;
15323 NumVecs = 2;
break;
15325 NumVecs = 3;
break;
15327 NumVecs = 4;
break;
15329 NumVecs = 2; IsStore =
true;
break;
15331 NumVecs = 3; IsStore =
true;
break;
15333 NumVecs = 4; IsStore =
true;
break;
15335 NumVecs = 2; IsDupOp =
true;
break;
15337 NumVecs = 3; IsDupOp =
true;
break;
15339 NumVecs = 4; IsDupOp =
true;
break;
15341 NumVecs = 2; IsLaneOp =
true;
break;
15343 NumVecs = 3; IsLaneOp =
true;
break;
15345 NumVecs = 4; IsLaneOp =
true;
break;
15347 NumVecs = 2; IsStore =
true; IsLaneOp =
true;
break;
15349 NumVecs = 3; IsStore =
true; IsLaneOp =
true;
break;
15351 NumVecs = 4; IsStore =
true; IsLaneOp =
true;
break;
15356 VecTy =
N->getOperand(2).getValueType();
15358 VecTy =
N->getValueType(0);
15363 uint32_t IncVal = CInc->getZExtValue();
15365 if (IsLaneOp || IsDupOp)
15367 if (IncVal != NumBytes)
15372 Ops.push_back(
N->getOperand(0));
15374 if (IsLaneOp || IsStore)
15375 for (
unsigned i = 2;
i < AddrOpIdx; ++
i)
15376 Ops.push_back(
N->getOperand(
i));
15377 Ops.push_back(
Addr);
15378 Ops.push_back(Inc);
15382 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
15384 for (
n = 0;
n < NumResultVecs; ++
n)
15396 std::vector<SDValue> NewResults;
15397 for (
unsigned i = 0;
i < NumResultVecs; ++
i) {
15400 NewResults.push_back(
SDValue(UpdN.
getNode(), NumResultVecs + 1));
15447 1LL << (width - 1);
15517 int CompConstant) {
15521 int MaxUInt = (1 << width);
15529 AddConstant -= (1 << (width-1));
15534 if ((AddConstant == 0) ||
15535 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
15536 (AddConstant >= 0 && CompConstant < 0) ||
15537 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
15542 if ((AddConstant == 0) ||
15543 (AddConstant >= 0 && CompConstant <= 0) ||
15544 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
15549 if ((AddConstant >= 0 && CompConstant < 0) ||
15550 (AddConstant <= 0 && CompConstant >= -1 &&
15551 CompConstant < AddConstant + MaxUInt))
15556 if ((AddConstant == 0) ||
15557 (AddConstant > 0 && CompConstant <= 0) ||
15558 (AddConstant < 0 && CompConstant <= AddConstant))
15563 if ((AddConstant >= 0 && CompConstant <= 0) ||
15564 (AddConstant <= 0 && CompConstant >= 0 &&
15565 CompConstant <= AddConstant + MaxUInt))
15570 if ((AddConstant > 0 && CompConstant < 0) ||
15571 (AddConstant < 0 && CompConstant >= 0 &&
15572 CompConstant < AddConstant + MaxUInt) ||
15573 (AddConstant >= 0 && CompConstant >= 0 &&
15574 CompConstant >= AddConstant) ||
15575 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
15594 unsigned CmpIndex) {
15595 unsigned CC = cast<ConstantSDNode>(
N->getOperand(CCIndex))->getSExtValue();
15596 SDNode *SubsNode =
N->getOperand(CmpIndex).getNode();
15597 unsigned CondOpcode = SubsNode->
getOpcode();
15606 unsigned MaskBits = 0;
15612 uint32_t CNV = CN->getZExtValue();
15615 else if (CNV == 65535)
15636 if (!isa<ConstantSDNode>(AddInputValue2.
getNode()) ||
15637 !isa<ConstantSDNode>(SubsInputValue.
getNode()))
15648 cast<ConstantSDNode>(AddInputValue2.
getNode())->getSExtValue(),
15649 cast<ConstantSDNode>(SubsInputValue.
getNode())->getSExtValue()))
15682 assert(isa<ConstantSDNode>(CCVal) &&
"Expected a ConstantSDNode here!");
15683 unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
15700 "Expected the value type to be the same for both operands!");
15732 if (
N->getOperand(0) ==
N->getOperand(1))
15733 return N->getOperand(0);
15752 auto *OpCC = cast<ConstantSDNode>(LHS.
getOperand(2));
15769 "Unexpected opcode!");
15795 if (!
Op->hasOneUse())
15805 Bit < Op->getValueType(0).getSizeInBits()) {
15811 Bit < Op->getOperand(0).getValueSizeInBits()) {
15815 if (
Op->getNumOperands() != 2)
15818 auto *
C = dyn_cast<ConstantSDNode>(
Op->getOperand(1));
15822 switch (
Op->getOpcode()) {
15828 if ((
C->getZExtValue() >>
Bit) & 1)
15834 if (
C->getZExtValue() <=
Bit &&
15835 (
Bit -
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
15836 Bit =
Bit -
C->getZExtValue();
15843 Bit =
Bit +
C->getZExtValue();
15844 if (
Bit >=
Op->getValueType(0).getSizeInBits())
15845 Bit =
Op->getValueType(0).getSizeInBits() - 1;
15850 if ((
Bit +
C->getZExtValue()) <
Op->getValueType(0).getSizeInBits()) {
15851 Bit =
Bit +
C->getZExtValue();
15858 if ((
C->getZExtValue() >>
Bit) & 1)
15868 unsigned Bit = cast<ConstantSDNode>(
N->getOperand(2))->getZExtValue();
15869 bool Invert =
false;
15870 SDValue TestSrc =
N->getOperand(1);
15873 if (TestSrc == NewTestSrc)
15876 unsigned NewOpc =
N->getOpcode();
15909 SDNode *SplatLHS =
N->getOperand(1).getNode();
15910 SDNode *SplatRHS =
N->getOperand(2).getNode();
15912 if (CmpLHS.
getValueType() ==
N->getOperand(1).getValueType() &&
15938 EVT ResVT =
N->getValueType(0);
15945 SDValue IfTrue =
N->getOperand(1);
15946 SDValue IfFalse =
N->getOperand(2);
15949 cast<CondCodeSDNode>(N0.
getOperand(2))->get());
15962 EVT ResVT =
N->getValueType(0);
15974 "Scalar-SETCC feeding SELECT has unexpected result type!");
15987 if (!ResVT.
isVector() || NumMaskElts == 0)
16023 if (
N->getValueType(0) ==
N->getOperand(0).getValueType())
16024 return N->getOperand(0);
16035 auto *GN = cast<GlobalAddressSDNode>(
N);
16040 uint64_t MinOffset = -1ull;
16044 auto *
C = dyn_cast<ConstantSDNode>(
N->getOperand(0));
16046 C = dyn_cast<ConstantSDNode>(
N->getOperand(1));
16049 MinOffset =
std::min(MinOffset,
C->getZExtValue());
16067 if (
Offset >= (1 << 21))
16072 if (!
T->isSized() ||
16087 "This method is only for scalable vectors of offsets");
16103 unsigned ScalarSizeInBytes) {
16105 if (OffsetInBytes % ScalarSizeInBytes)
16109 if (OffsetInBytes / ScalarSizeInBytes > 31)
16123 unsigned ScalarSizeInBytes) {
16131 bool OnlyPackedOffsets =
true) {
16132 const SDValue Src =
N->getOperand(2);
16135 "Scatter stores are only possible for SVE vectors");
16192 if (!TLI.isTypeLegal(
Base.getValueType()))
16198 if (!OnlyPackedOffsets &&
16202 if (!TLI.isTypeLegal(
Offset.getValueType()))
16223 SDValue Ops[] = {
N->getOperand(0),
16230 return DAG.
getNode(Opcode,
DL, VTs, Ops);
16235 bool OnlyPackedOffsets =
true) {
16236 const EVT RetVT =
N->getValueType(0);
16238 "Gather loads are only possible for SVE vectors");
16268 Offset.getValueType().isVector())
16295 if (!TLI.isTypeLegal(
Base.getValueType()))
16301 if (!OnlyPackedOffsets &&
16316 SDValue Ops[] = {
N->getOperand(0),
16323 if (RetVT.
isInteger() && (RetVT != HwRetVt))
16356 auto VT = cast<VTSDNode>(
N->getOperand(1))->getVT();
16361 "Sign extending from an invalid type");
16380 unsigned MemVTOpNum = 4;
16443 EVT SignExtSrcVT = cast<VTSDNode>(
N->getOperand(1))->getVT();
16444 EVT SrcMemVT = cast<VTSDNode>(Src->
getOperand(MemVTOpNum))->getVT();
16446 if ((SignExtSrcVT != SrcMemVT) || !Src.
hasOneUse())
16449 EVT DstVT =
N->getValueType(0);
16468 const unsigned OffsetPos = 4;
16480 Ops[OffsetPos] =
Offset;
16491 unsigned ScalarSizeInBytes) {
16492 const unsigned ImmPos = 4, OffsetPos = 3;
16499 std::swap(Ops[ImmPos], Ops[OffsetPos]);
16503 Ops[1] = DAG.
getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index,
DL,
16512 switch (
Op.getOpcode()) {
16536 SDValue InsertVec =
N->getOperand(0);
16537 SDValue InsertElt =
N->getOperand(1);
16538 SDValue InsertIdx =
N->getOperand(2);
16579 EVT Ty =
N->getValueType(0);
16603 switch (
N->getOpcode()) {
16697 switch (cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue()) {
16698 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
16700 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
16702 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
16704 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
16706 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
16707 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
16708 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
16709 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
16710 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
16711 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
16712 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
16713 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
16715 case Intrinsic::aarch64_neon_ld2:
16716 case Intrinsic::aarch64_neon_ld3:
16717 case Intrinsic::aarch64_neon_ld4:
16718 case Intrinsic::aarch64_neon_ld1x2:
16719 case Intrinsic::aarch64_neon_ld1x3:
16720 case Intrinsic::aarch64_neon_ld1x4:
16721 case Intrinsic::aarch64_neon_ld2lane:
16722 case Intrinsic::aarch64_neon_ld3lane:
16723 case Intrinsic::aarch64_neon_ld4lane:
16724 case Intrinsic::aarch64_neon_ld2r:
16725 case Intrinsic::aarch64_neon_ld3r:
16726 case Intrinsic::aarch64_neon_ld4r:
16727 case Intrinsic::aarch64_neon_st2:
16728 case Intrinsic::aarch64_neon_st3:
16729 case Intrinsic::aarch64_neon_st4:
16730 case Intrinsic::aarch64_neon_st1x2:
16731 case Intrinsic::aarch64_neon_st1x3:
16732 case Intrinsic::aarch64_neon_st1x4:
16733 case Intrinsic::aarch64_neon_st2lane:
16734 case Intrinsic::aarch64_neon_st3lane:
16735 case Intrinsic::aarch64_neon_st4lane:
16737 case Intrinsic::aarch64_sve_ldnt1:
16739 case Intrinsic::aarch64_sve_ld1rq:
16740 return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(
N, DAG);
16741 case Intrinsic::aarch64_sve_ld1ro:
16742 return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(
N, DAG);
16743 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
16745 case Intrinsic::aarch64_sve_ldnt1_gather:
16747 case Intrinsic::aarch64_sve_ldnt1_gather_index:
16750 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
16752 case Intrinsic::aarch64_sve_ld1:
16754 case Intrinsic::aarch64_sve_ldnf1:
16756 case Intrinsic::aarch64_sve_ldff1:
16758 case Intrinsic::aarch64_sve_st1:
16760 case Intrinsic::aarch64_sve_stnt1:
16762 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
16764 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
16766 case Intrinsic::aarch64_sve_stnt1_scatter:
16768 case Intrinsic::aarch64_sve_stnt1_scatter_index:
16770 case Intrinsic::aarch64_sve_ld1_gather:
16772 case Intrinsic::aarch64_sve_ld1_gather_index:
16775 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
16778 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
16781 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
16785 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
16789 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
16791 case Intrinsic::aarch64_sve_ldff1_gather:
16793 case Intrinsic::aarch64_sve_ldff1_gather_index:
16796 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
16800 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
16804 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
16808 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
16812 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
16815 case Intrinsic::aarch64_sve_st1_scatter:
16817 case Intrinsic::aarch64_sve_st1_scatter_index:
16819 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
16822 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
16825 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
16829 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
16833 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
16835 case Intrinsic::aarch64_sve_tuple_get: {
16841 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
16842 EVT ResVT =
N->getValueType(0);
16849 case Intrinsic::aarch64_sve_tuple_set: {
16859 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
16860 uint64_t NumLanes =
16863 if ((TupleLanes % NumLanes) != 0)
16866 uint64_t NumVecs = TupleLanes / NumLanes;
16869 for (
unsigned I = 0;
I < NumVecs; ++
I) {
16871 Opnds.push_back(Vec);
16882 case Intrinsic::aarch64_sve_tuple_create2:
16883 case Intrinsic::aarch64_sve_tuple_create3:
16884 case Intrinsic::aarch64_sve_tuple_create4: {
16889 for (
unsigned I = 2;
I <
N->getNumOperands(); ++
I)
16890 Opnds.push_back(
N->getOperand(
I));
16892 EVT VT = Opnds[0].getValueType();
16896 (
N->getNumOperands() - 2));
16900 case Intrinsic::aarch64_sve_ld2:
16901 case Intrinsic::aarch64_sve_ld3:
16902 case Intrinsic::aarch64_sve_ld4: {
16906 SDValue BasePtr =
N->getOperand(3);
16908 unsigned IntrinsicID =
16909 cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
16911 LowerSVEStructLoad(IntrinsicID, LoadOps,
N->getValueType(0), DAG,
DL);
16914 case Intrinsic::aarch64_rndr:
16915 case Intrinsic::aarch64_rndrrs: {
16916 unsigned IntrinsicID =
16917 cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
16919 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
16920 : AArch64SysReg::RNDRRS);
16946 bool AArch64TargetLowering::isUsedByReturnOnly(
SDNode *
N,
16948 if (
N->getNumValues() != 1)
16950 if (!
N->hasNUsesOfValue(1, 0))
16954 SDNode *Copy = *
N->use_begin();
16965 bool HasRet =
false;
16983 bool AArch64TargetLowering::mayBeEmittedAsTailCall(
const CallInst *CI)
const {
16995 Base =
Op->getOperand(0);
16999 int64_t RHSC = RHS->getSExtValue();
17001 RHSC = -(uint64_t)RHSC;
17002 if (!isInt<9>(RHSC))
17018 VT =
LD->getMemoryVT();
17019 Ptr =
LD->getBasePtr();
17021 VT =
ST->getMemoryVT();
17022 Ptr =
ST->getBasePtr();
17033 bool AArch64TargetLowering::getPostIndexedAddressParts(
17039 VT =
LD->getMemoryVT();
17040 Ptr =
LD->getBasePtr();
17042 VT =
ST->getMemoryVT();
17043 Ptr =
ST->getBasePtr();
17048 if (!getIndexedAddressParts(
Op,
Base,
Offset, AM, IsInc, DAG))
17058 void AArch64TargetLowering::ReplaceBITCASTResults(
17062 EVT VT =
N->getValueType(0);
17063 EVT SrcVT =
Op.getValueType();
17067 "Expected fp->int bitcast!");
17088 unsigned AcrossOp) {
17105 return std::make_pair(
Lo,
Hi);
17108 void AArch64TargetLowering::ReplaceExtractSubVectorResults(
17111 EVT InVT =
In.getValueType();
17118 EVT VT =
N->getValueType(0);
17127 auto *CIndex = dyn_cast<ConstantSDNode>(
N->getOperand(1));
17131 unsigned Index = CIndex->getZExtValue();
17155 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
17165 "AtomicCmpSwap on types less than 128 should be legal");
17179 switch (
MemOp->getMergedOrdering()) {
17180 case AtomicOrdering::Monotonic:
17181 Opcode = AArch64::CASPX;
17183 case AtomicOrdering::Acquire:
17184 Opcode = AArch64::CASPAX;
17187 Opcode = AArch64::CASPLX;
17189 case AtomicOrdering::AcquireRelease:
17190 case AtomicOrdering::SequentiallyConsistent:
17191 Opcode = AArch64::CASPALX;
17201 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
17215 switch (
MemOp->getMergedOrdering()) {
17216 case AtomicOrdering::Monotonic:
17217 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
17219 case AtomicOrdering::Acquire:
17220 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
17223 Opcode = AArch64::CMP_SWAP_128_RELEASE;
17225 case AtomicOrdering::AcquireRelease:
17226 case AtomicOrdering::SequentiallyConsistent:
17227 Opcode = AArch64::CMP_SWAP_128;
17235 SDValue Ops[] = {
N->getOperand(1), Desired.first, Desired.second,
17236 New.first, New.second,
N->getOperand(0)};
17247 void AArch64TargetLowering::ReplaceNodeResults(
17249 switch (
N->getOpcode()) {
17253 ReplaceBITCASTResults(
N,
Results, DAG);
17287 assert(
N->getValueType(0) ==
MVT::i128 &&
"unexpected illegal conversion");
17295 "unexpected load's value type");
17305 DAG.
getVTList({MVT::i64, MVT::i64, MVT::Other}),
17306 {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->
getMemoryVT(),
17315 ReplaceExtractSubVectorResults(
N,
Results, DAG);
17322 EVT VT =
N->getValueType(0);
17324 "custom lowering for unexpected type");
17331 case Intrinsic::aarch64_sve_clasta_n: {
17335 N->getOperand(1), Op2,
N->getOperand(3));
17339 case Intrinsic::aarch64_sve_clastb_n: {
17343 N->getOperand(1), Op2,
N->getOperand(3));
17347 case Intrinsic::aarch64_sve_lasta: {
17350 N->getOperand(1),
N->getOperand(2));
17354 case Intrinsic::aarch64_sve_lastb: {
17357 N->getOperand(1),
N->getOperand(2));
17372 unsigned AArch64TargetLowering::combineRepeatedFPDivisors()
const {
17393 unsigned Size =
SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
17394 return Size == 128;
17418 if (Subtarget->
hasLSE())
17481 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
17495 Type *Tys[] = {
Addr->getType() };
17497 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
17504 return Builder.CreateBitCast(Trunc, ValueTy);
17524 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
17535 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
17536 Type *Tys[] = {
Addr->getType() };
17541 Val =
Builder.CreateBitCast(Val, IntValTy);
17543 return Builder.CreateCall(Stxr,
17544 {
Builder.CreateZExtOrBitCast(
17563 bool AArch64TargetLowering::shouldNormalizeToSelectSequence(
LLVMContext &,
17597 M.getOrInsertGlobal(
"__security_cookie",
17606 F->addAttribute(1, Attribute::AttrKind::InReg);
17616 return M.getGlobalVariable(
"__security_cookie");
17623 return M.getFunction(
"__security_check_cookie");
17653 return Mask->getValue().isPowerOf2();
17659 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
17663 X,
XC, CC,
Y, OldShiftOpcode, NewShiftOpcode, DAG))
17666 return X.getValueType().isScalarInteger() || NewShiftOpcode ==
ISD::SHL;
17687 const MCPhysReg *IStart =
TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
17696 if (AArch64::GPR64RegClass.
contains(*
I))
17697 RC = &AArch64::GPR64RegClass;
17698 else if (AArch64::FPR64RegClass.
contains(*
I))
17699 RC = &AArch64::FPR64RegClass;
17709 assert(Entry->getParent()->getFunction().hasFnAttribute(
17710 Attribute::NoUnwind) &&
17711 "Function should be nounwind in insertCopiesSplitCSR!");
17712 Entry->addLiveIn(*
I);
17717 for (
auto *Exit : Exits)
17719 TII->get(TargetOpcode::COPY), *
I)
17753 void AArch64TargetLowering::finalizeLowering(
MachineFunction &MF)
const {
17763 bool AArch64TargetLowering::shouldLocalize(
17765 switch (
MI.getOpcode()) {
17766 case TargetOpcode::G_GLOBAL_VALUE: {
17778 case AArch64::G_ADD_LOW:
17787 if (isa<ScalableVectorType>(Inst.
getType()))
17794 if (
const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
17795 if (isa<ScalableVectorType>(AI->getAllocatedType()))
17806 "Expected legal fixed length vector!");
17832 "Expected legal fixed length vector!");
17839 PgPattern = AArch64SVEPredPattern::vl1;
17842 PgPattern = AArch64SVEPredPattern::vl2;
17845 PgPattern = AArch64SVEPredPattern::vl4;
17848 PgPattern = AArch64SVEPredPattern::vl8;
17851 PgPattern = AArch64SVEPredPattern::vl16;
17854 PgPattern = AArch64SVEPredPattern::vl32;
17857 PgPattern = AArch64SVEPredPattern::vl64;
17860 PgPattern = AArch64SVEPredPattern::vl128;
17863 PgPattern = AArch64SVEPredPattern::vl256;
17899 "Expected legal scalable vector!");
17914 "Expected to convert into a scalable vector!");
17916 "Expected a fixed length vector operand!");
17925 "Expected to convert into a fixed length vector!");
17927 "Expected a scalable vector operand!");
17934 SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
17936 auto Load = cast<LoadSDNode>(
Op);
17939 EVT VT =
Op.getValueType();
17943 ContainerVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(),
17945 Load->getMemoryVT(),
Load->getMemOperand(),
Load->getAddressingMode(),
17946 Load->getExtensionType());
17956 EVT InVT =
Mask.getValueType();
17963 EVT CmpVT = Pg.getValueType();
17969 SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
17971 auto Load = cast<MaskedLoadSDNode>(
Op);
17977 EVT VT =
Op.getValueType();
17983 bool IsPassThruZeroOrUndef =
false;
17985 if (
Load->getPassThru()->isUndef()) {
17986 PassThru = DAG.
getUNDEF(ContainerVT);
17987 IsPassThruZeroOrUndef =
true;
17994 IsPassThruZeroOrUndef =
true;
17998 ContainerVT,
DL,
Load->getChain(),
Load->getBasePtr(),
Load->getOffset(),
17999 Mask, PassThru,
Load->getMemoryVT(),
Load->getMemOperand(),
18000 Load->getAddressingMode(),
Load->getExtensionType());
18002 if (!IsPassThruZeroOrUndef) {
18005 NewLoad = DAG.
getSelect(
DL, ContainerVT,
Mask, NewLoad, OldPassThru);
18014 SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
18016 auto Store = cast<StoreSDNode>(
Op);
18019 EVT VT =
Store->getValue().getValueType();
18026 Store->getMemOperand(),
Store->getAddressingMode(),
18027 Store->isTruncatingStore());
18030 SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
18032 auto Store = cast<MaskedStoreSDNode>(
Op);
18034 if (
Store->isTruncatingStore())
18038 EVT VT =
Store->getValue().getValueType();
18047 Store->getAddressingMode(),
Store->isTruncatingStore());
18050 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
18053 EVT VT =
Op.getValueType();
18061 return LowerToPredicatedOp(
Op, DAG, PredOpcode,
true);
18086 SDValue Op0Lo = DAG.
getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
18087 SDValue Op1Lo = DAG.
getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
18088 SDValue Op0Hi = DAG.
getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
18089 SDValue Op1Hi = DAG.
getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
18105 ResultLo, ResultHi);
18110 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
18112 EVT VT =
Op.getValueType();
18146 SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
18148 EVT VT =
Op.getValueType();
18182 SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
18184 EVT VT =
Op.getValueType();
18185 EVT InVT =
Op.getOperand(0).getValueType();
18195 SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
18197 EVT VT =
Op.getValueType();
18201 EVT InVT =
Op.getOperand(0).getValueType();
18206 Op.getOperand(1),
Op.getOperand(2));
18217 bool OverrideNEON)
const {
18218 EVT VT =
Op.getValueType();
18222 if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
18227 for (
const SDValue &V :
Op->op_values()) {
18228 if (isa<CondCodeSDNode>(V)) {
18233 if (
const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
18241 "Only fixed length vectors are supported!");
18255 for (
const SDValue &V :
Op->op_values()) {
18258 "Only scalable vectors are supported!");
18273 EVT VT =
Op.getValueType();
18274 assert(useSVEForFixedLengthVectorVT(VT) &&
18275 "Only expected to lower fixed length vector operation!");
18280 for (
const SDValue &V :
Op->op_values()) {
18281 assert(!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!");
18291 "Only fixed length vectors are supported!");
18295 auto ScalableRes = DAG.
getNode(
Op.getOpcode(),
SDLoc(
Op), ContainerVT, Ops);
18299 SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(
SDValue ScalarOp,
18307 EVT ContainerVT = SrcVT;
18318 DAG.
getUNDEF(ContainerVT), AccOp, Zero);
18327 SDValue AArch64TargetLowering::LowerPredReductionToSVE(
SDValue ReduceOp,
18331 EVT OpVT =
Op.getValueType();
18360 SDValue AArch64TargetLowering::LowerReductionToSVE(
unsigned Opcode,
18367 if (useSVEForFixedLengthVectorVT(SrcVT,
true)) {
18392 AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(
SDValue Op,
18394 EVT VT =
Op.getValueType();
18397 EVT InVT =
Op.getOperand(1).getValueType();
18404 EVT MaskVT =
Op.getOperand(0).getValueType();
18416 SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
18419 EVT InVT =
Op.getOperand(0).getValueType();
18422 assert(useSVEForFixedLengthVectorVT(InVT) &&
18423 "Only expected to lower fixed length vector operation!");
18425 "Expected integer result of the same bit length as the inputs!");
18433 {Pg, Op1, Op2,
Op.getOperand(2)});
18441 AArch64TargetLowering::LowerFixedLengthBitcastToSVE(
SDValue Op,
18444 auto SrcOp =
Op.getOperand(0);
18445 EVT VT =
Op.getValueType();
18447 EVT ContainerSrcVT =
18455 SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
18458 unsigned NumOperands =
Op->getNumOperands();
18461 "Unexpected number of operands in CONCAT_VECTORS");
18463 auto SrcOp1 =
Op.getOperand(0);
18464 auto SrcOp2 =
Op.getOperand(1);
18465 EVT VT =
Op.getValueType();
18466 EVT SrcVT = SrcOp1.getValueType();
18468 if (NumOperands > 2) {
18471 for (
unsigned I = 0;
I < NumOperands;
I += 2)
18473 Op->getOperand(
I),
Op->getOperand(
I + 1)));
18490 AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(
SDValue Op,
18492 EVT VT =
Op.getValueType();
18507 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
18509 Pg, Val, DAG.
getUNDEF(ContainerVT));
18515 AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(
SDValue Op,
18517 EVT VT =
Op.getValueType();
18539 AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(
SDValue Op,
18541 EVT VT =
Op.getValueType();
18565 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
18575 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
18584 AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(
SDValue Op,
18586 EVT VT =
Op.getValueType();
18609 Val = getSVESafeBitCast(CvtVT, Val, DAG);
18610 Val = DAG.
getNode(Opcode,
DL, ContainerDstVT, Pg, Val,
18627 SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
18629 EVT VT =
Op.getValueType();
18632 auto *SVN = cast<ShuffleVectorSDNode>(
Op.getNode());
18633 auto ShuffleMask = SVN->
getMask();
18643 bool ReverseEXT =
false;
18645 if (
isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
18666 EVT InVT =
Op.getValueType();
18672 "Only expect to cast between legal scalable vector types!");
18675 "Cannot cast between data and predicate scalable vector types!");
18687 if (InVT != PackedInVT)
18693 if (VT != PackedVT)
18707 bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
18709 const APInt &OriginalDemandedElts,
KnownBits &Known, TargetLoweringOpt &TLO,
18710 unsigned Depth)
const {
18712 unsigned Opc =
Op.getOpcode();
18729 if (ShiftRBits != ShiftLBits)
18732 unsigned ScalarSize =
Op.getScalarValueSizeInBits();
18733 assert(ScalarSize > ShiftLBits &&
"Invalid shift imm");
18736 APInt UnusedBits = ~OriginalDemandedBits;
18738 if ((ZeroBits & UnusedBits) != ZeroBits)
18748 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO,
Depth);
18751 bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
18752 unsigned Opc,
LLT Ty1,
LLT Ty2)
const {